]> granicus.if.org Git - php/commitdiff
updated bundled oniguruma regex library to 5.9.2. fixed bug #42290.
authorRui Hirokawa <hirokawa@php.net>
Sat, 15 Oct 2011 08:55:53 +0000 (08:55 +0000)
committerRui Hirokawa <hirokawa@php.net>
Sat, 15 Oct 2011 08:55:53 +0000 (08:55 +0000)
66 files changed:
ext/mbstring/oniguruma/COPYING
ext/mbstring/oniguruma/HISTORY
ext/mbstring/oniguruma/README
ext/mbstring/oniguruma/README.ja
ext/mbstring/oniguruma/doc/API
ext/mbstring/oniguruma/doc/API.ja
ext/mbstring/oniguruma/doc/FAQ
ext/mbstring/oniguruma/doc/FAQ.ja
ext/mbstring/oniguruma/doc/RE
ext/mbstring/oniguruma/doc/RE.ja
ext/mbstring/oniguruma/enc/ascii.c
ext/mbstring/oniguruma/enc/big5.c
ext/mbstring/oniguruma/enc/cp1251.c [new file with mode: 0644]
ext/mbstring/oniguruma/enc/euc_jp.c
ext/mbstring/oniguruma/enc/euc_kr.c
ext/mbstring/oniguruma/enc/euc_tw.c
ext/mbstring/oniguruma/enc/gb18030.c
ext/mbstring/oniguruma/enc/iso8859_1.c
ext/mbstring/oniguruma/enc/iso8859_10.c
ext/mbstring/oniguruma/enc/iso8859_11.c
ext/mbstring/oniguruma/enc/iso8859_13.c
ext/mbstring/oniguruma/enc/iso8859_14.c
ext/mbstring/oniguruma/enc/iso8859_15.c
ext/mbstring/oniguruma/enc/iso8859_16.c
ext/mbstring/oniguruma/enc/iso8859_2.c
ext/mbstring/oniguruma/enc/iso8859_3.c
ext/mbstring/oniguruma/enc/iso8859_4.c
ext/mbstring/oniguruma/enc/iso8859_5.c
ext/mbstring/oniguruma/enc/iso8859_6.c
ext/mbstring/oniguruma/enc/iso8859_7.c
ext/mbstring/oniguruma/enc/iso8859_8.c
ext/mbstring/oniguruma/enc/iso8859_9.c
ext/mbstring/oniguruma/enc/koi8.c
ext/mbstring/oniguruma/enc/koi8_r.c
ext/mbstring/oniguruma/enc/mktable.c
ext/mbstring/oniguruma/enc/sjis.c
ext/mbstring/oniguruma/enc/unicode.c
ext/mbstring/oniguruma/enc/utf16_be.c
ext/mbstring/oniguruma/enc/utf16_le.c
ext/mbstring/oniguruma/enc/utf32_be.c
ext/mbstring/oniguruma/enc/utf32_le.c
ext/mbstring/oniguruma/enc/utf8.c
ext/mbstring/oniguruma/index.html
ext/mbstring/oniguruma/index_ja.html [new file with mode: 0644]
ext/mbstring/oniguruma/onigposix.h
ext/mbstring/oniguruma/oniguruma.h
ext/mbstring/oniguruma/regcomp.c
ext/mbstring/oniguruma/regenc.c
ext/mbstring/oniguruma/regenc.h
ext/mbstring/oniguruma/regerror.c
ext/mbstring/oniguruma/regexec.c
ext/mbstring/oniguruma/regext.c
ext/mbstring/oniguruma/reggnu.c
ext/mbstring/oniguruma/regint.h
ext/mbstring/oniguruma/regparse.c
ext/mbstring/oniguruma/regparse.h
ext/mbstring/oniguruma/regposerr.c
ext/mbstring/oniguruma/regposix.c
ext/mbstring/oniguruma/regsyntax.c
ext/mbstring/oniguruma/regversion.c
ext/mbstring/oniguruma/st.c
ext/mbstring/oniguruma/testc.c [new file with mode: 0644]
ext/mbstring/oniguruma/testu.c [new file with mode: 0644]
ext/mbstring/oniguruma/win32/Makefile [new file with mode: 0644]
ext/mbstring/oniguruma/win32/testc.c [new file with mode: 0644]
ext/mbstring/tests/mb_eregi_replace.phpt

index 4d321bb93bb68221509b97afeea2d21546b8e54b..2cee0bbec82454706633df68a585af135f6cf2a1 100644 (file)
@@ -1,12 +1,8 @@
 Oniguruma LICENSE
 -----------------
 
-When this software is partly used or it is distributed with Ruby, 
-this of Ruby follows the license of Ruby.
-It follows the BSD license in the case of the one except for it.
-
 /*-
- * Copyright (c) 2002-2006  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp>
+ * Copyright (c) 2002-2007  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
index a1debefa490b509fc7b8c11ed616474fae4d5366..06f38c246a99909d95a1a51a531db2b605f0b4e9 100644 (file)
 History
 
-2007/08/16: Version 4.7.1
-
-2007/08/16: [test] success in ruby 1.9.0 (2007-04-06) [i686-linux].
-2007/07/04: [spec] (thanks K.Takata)
+2010/01/09: Version 5.9.2
+
+2010/01/05: [bug]  fix utf16be_code_to_mbc() and utf16le_code_to_mbc().
+2008/09/16: [bug]  fix memory leaks in parse_exp().
+2008/08/01: [bug]  fix memory leaks.
+2008/06/17: [bug]  invalid type of argument was used
+                   in onig_st_lookup_strend().
+2008/06/16: [bug]  invalid CaseFoldMap entry in ISO-8859-5. 0xdf -> 0xde
+2008/02/19: [new]  add: onig_reg_init().
+2008/02/19: [new]  add: onig_free_body().
+2008/02/19: [new]  add: onig_new_without_alloc().
+2008/02/19: [API]  rename onig_alloc_init() to onig_reg_init(),
+                   and argument type changed.
+2008/01/31: [impl] move UTF16_IS_SURROGATE_XXX() to regenc.h.
+2008/01/30: [bug]  (thanks akr)
+                   fix euctw_islead().
+2008/01/23: [bug]  update enc/koi8.c.
+
+2007/12/22: Version 5.9.1
+
+2007/12/21: [impl] add sprint_byte().
+2007/11/28: [bug]  (thanks Andy Armstrong)
+                   don't overwrite error code in fetch_name().
+2007/11/12: [bug]  utf8 mbc length of code 0xfe, 0xff are not 1,
+2007/10/23: [spec] onig_enc_len() takes three arguments. (not used)
+2007/10/15: [impl] (thanks Rui Hirokawa)
+                   add check HAVE_STDARG_H.
+2007/09/07: [API]  rename enc_len() to onig_enc_len() in oniguruma.h.
+2007/09/04: [API]  remove ONIGENC_ERR_XXXXX.
+2007/09/03: [API]  add error ONIGERR_INVALID_CODE_POINT_VALUE.
+2007/09/03: [impl] change error message to "invaid code point value"
+                   for ONIGERR_INVALID_WIDE_CHAR_VALUE.
+2007/09/03: [bug]  xxx_code_to_mbclen() should return
+                   ONIGERR_INVALID_WIDE_CHAR_VALUE for invalid code point.
+                   ex. /[\x{7fffffff}]/ for ASCII encoding.
+2007/08/28: [impl] remove "warning: no previous declaration ...".
+2007/08/21: [impl] remove warnings in enc/mktable.c.
+2007/08/20: [impl] remove "warning: unused parameter"
+2007/08/20: [impl] remove "warning: comparison between signed and unsigned".
+2007/08/06: [impl] remove clear_not_flag_cclass().
+2007/08/03: [bug]  fix the case of undefined USE_NAMED_GROUP.
+2007/08/02: [spec] add backref by number.
+2007/08/01: [API]  add OnigCtype.
+2007/07/27: [spec] add USE_CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS.
+2007/07/24: [impl] define PLATFORM_UNALIGNED_WORD_ACCESS.
+2007/07/23: [dist] fix doc/FAQ.ja.
+
+2007/07/14: Version 5.9.0
+
+2007/07/13: [bug]  add check into onig_reduce_nested_quantifier().
+2007/06/26: [spec] (thanks K.Takata)
                    ONIG_OPTION_SINGLELINE: '$' -> '\Z'  (as Perl)
-2007/07/04: [dist] (thanks K.Takata)
+2007/06/26: [dist] (thanks K.Takata)
                    fix documents API and API.ja.
-
-2007/06/18: Version 4.7.0
-
-2007/06/18: [test] success in ruby 1.9.0 (2007-04-06) [i686-linux].
+2007/06/19: [impl] remove IS_NOT_NULL() check before onig_node_free().
 2007/06/18: [bug]  (thanks KUBO Takehiro)
                    WORD_ALIGNMENT_SIZE must be sizeof(OnigCodePoint).
-2007/06/05: [impl] add #ifndef vsnprintf in regint.h.
-2007/06/05: [bug]  should check USE_CRNL_AS_LINE_TERMINATOR case
+2007/06/18: [impl] rename CClassNode flags.
+2007/06/18: [bug]  initialization miss.
+2007/06/13: [impl] change node type reference NXXXX.
+2007/06/11: [impl] add node type bit.
+2007/06/11: [spec] allow anchor in enclosed repeater. /(\z)*/
+2007/06/11: [impl] rename node types.
+2007/06/08: [impl] remove OP_SET_OPTION_PUSH and OP_SET_OPTION from match_at().
+2007/06/07: [impl] use xvsnprintf().
+2007/06/06: [tune] don't set qn->next_head_exact for string first byte is zero.
+2007/06/06: [impl] remove unused variables.
+
+2007/06/04: Version 5.8.0
+
+2007/06/04: [impl] add #ifndef vsnprintf into regint.h.
+2007/05/31: [dist] add configure option '--enable-crnl-as-line-terminator'.
+2007/05/30: [dist] add sample/crnl.c.
+2007/05/30: [bug]  should check USE_CRNL_AS_LINE_TERMINATOR case
                    in onig_search().
+2007/05/29: [impl] move USE_CRNL_AS_LINE_TERMINATOR into regenc.h.
+2007/05/29: [impl] should check USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
+                   in forward_search_range() and backward_search_range().
 
-2007/04/12: Version 4.6.2
+2007/04/27: Version 5.7.0
 
+2007/04/20: [spec] add config USE_MATCH_RANGE_IS_COMPLETE_RANGE.
+2007/04/20: [impl] refactoring in match_at().
+
+2007/04/12: Version 5.6.1
+
+2007/04/12: [bug]  must not use UChar in oniguruma.h.
 2007/04/09: [impl] change STATE_CHECK_BUFF_MAX_SIZE value from 0x8000
-                   to 0x4000.
+                   to 0x4000. [ruby-core:10883]
+
+2007/04/04: Version 5.6.0  (mourning for Hideo Takamatsu)
+
+2007/04/03: [spec] add new notation (?'name'), \k'name', \g'name'.
+2007/04/03: [impl] remove unused variable.
 2007/03/26: [impl] add 'void' to function declarations.
 
-2007/03/06: Version 4.6.1
+2007/03/06: Version 5.5.3
 
-2007/03/06: [test] success in ruby 1.9.0 (2006-10-23) [i686-linux].
 2007/03/06: [bug]  add #include <malloc.h> for bcc32.
                    (In bcc32, alloca() is declared in malloc.h.)
-2007/03/06: [impl] remove including version.h of Ruby.
 2007/03/02: [bug]  invalid optimization for semi-end-buf in onig_search().
                    ex. /\n\Z/.match("aaaaaaaaaa\n")
 2007/03/02: [impl] move range > start check position in end_buf process.
 
-2007/02/08: Version 4.6.0
+2007/01/09: Version 5.5.2
 
-2007/02/08: [test] success in ruby 1.9.0 (2006-10-23) [i686-linux].
-2007/01/09: [tune] select_opt_exact_info() didn't work for empty info.
+2007/01/09: [impl] rename USE_EXTERNAL_LOWER_CASE_CONV_TABLE.
+2007/01/05: [tune] select_opt_exact_info() didn't work for empty info.
                    ex. /.a/ make MAP info instead of EXACT info.
-2006/12/29: [impl] add print_enc_string() for ONIG_DEBUG mode.
-2006/12/22: [spec] should check too short multibyte char in parse_exp().
-                   add USE_PAD_TO_SHORT_BYTE_CHAR.
-                   ex. /\x00/ in UTF16 should be error.
+2006/12/28: [impl] add print_enc_string() for ONIG_DEBUG mode.
+
+2006/12/22: Version 5.5.1
 
-2006/11/17: Version 4.5.1
+2006/12/22: [impl] rename ADD_PAD_TO_SHORT_BYTE_STRING
+                 . to USE_PAD_TO_SHORT_BYTE_CHAR.
+2006/12/21: [spec] should check too short multibyte char in parse_exp().
+                   add ADD_PAD_TO_SHORT_BYTE_STRING.
+                   ex. /\x00/ in UTF16 should be error.
 
-2006/11/17: [test] success in ruby 1.9.0 (2006-10-23) [i686-linux].
-2006/11/15: [impl] remove CHECK_INTERRUPT.
+2006/12/06: Version 5.5.0
+
+2006/12/05: [bug]  should add unfold-1 codes from folded code into
+                   onigenc_unicode_get_case_fold_codes_by_str().
+                   (ex. "S" -> "s" -> 0x017f)
+2006/12/05: [new]  add flag ONIGENC_CASE_FOLD_TURKISH_AZERI and
+                   USE_UNICODE_CASE_FOLD_TURKISH_AZERI. (disabled in default)
+2006/12/04: [spec] remove ONIGENC_CASE_FOLD_FULL.
+2006/11/30: [impl] remove unnecessary check in xxx_mbc_case_fold().
+
+2006/11/29: Version 5.4.0
+
+2006/11/28: [spec] INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR is enabled in
+                   default case fold status.
+2006/11/28: [spec] rename ONIGENC_CASE_FOLD_MULTI_CHAR to
+                   INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR.
+2006/11/28: [impl] remove USE_UNICODE_CASE_FOLD_MULTI_CHAR.
+2006/11/28: [impl] remove Fold[123]Table and add FoldTable.
+2006/11/27: [impl] change tool/unicode_fc.rb to see CaseFolding.txt.
+2006/11/24: [bug]  should call callback for to[j] <-> to[k] in
+                   onigenc_unicode_apply_all_case_fold().
+
+2006/11/22: Version 5.3.0
+
+2006/11/22: [dist] add index_ja.html.
+2006/11/22: [impl] undef ONIG_ESCAPE_UCHAR_COLLISION in regint.h and regenc.h.
+2006/11/21: [bug]  invalid array access.
+2006/11/21: [impl] escape UChar collision from config.h.
+2006/11/20: [new]  add Hiragana/Katakana properties into Shift_JIS.
+2006/11/20: [impl] fix CR_Katakana[] values in EUC-JP.
+2006/11/17: [impl] declare strend hash table functions in regint.h.
+2006/11/17: [impl] move property list functions to regenc.c.
+2006/11/17: [new]  add Hiragana/Katakana properties into EUC-JP.
+2006/11/15: [impl] remove NOT_RUBY from AM_CFLAGS.
+
+2006/11/14: Version 5.2.0
+
+2006/11/14: [impl] remove program codes for Ruby.
+2006/11/14: [impl] reduce program codes for Ruby.
 2006/11/10: [bug]  0x24, 0x2b, 0x3c, 0x3d, 0x3e, 0x5e, 0x60, 0x7c, 0x7e
                    should be [:punct:].
+2006/11/09: [new]  (thanks Byte)
+                   add new character encoding CP1251.
 2006/11/08: [impl] rename QUALIFIER -> QUANTIFIER.
-2006/11/07: [bug]  (thanks Byte)
-                   add 0xa3 <=> 0xb3 to CaseFoldMap[] for KOI8-R.
 
-2006/11/06: Version 4.5.0
+2006/11/07: Version 5.1.0
 
-2006/11/06: [test] success in ruby 1.9.0 (2006-10-23) [i686-linux].
-2006/11/06: [API]  remove ONIGENC_AMBIGUOUS_MATCH_COMPOUND.
+2006/11/07: [dist] remove test.rb, testconv.rb and testconvu.rb.
+2006/11/07: [bug]  get_case_fold_codes_by_str() should handle 'Ss' and 'sS'
+                   combination for ess-tsett.
+2006/11/07: [impl] apply_all_case_fold() doesn't need to return all 
+                   case character combination for multi-character folding.
+                   (ONIGENC_CASE_FOLD_MULTI_CHAR)
+2006/11/07: [bug]  (thanks Byte)
+                   add { 0xa3, 0xb3 } to CaseFoldMap[] for KOI8-R.
 2006/11/06: [spec] change ONIG_OPTION_FIND_LONGEST to search all of 
                    the string range.
                    add USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE.
-
-2006/10/30: Version 4.4.6
-
-2006/10/30: [test] success in ruby 1.9.0 (2006-10-23) [i686-linux].
+2006/11/02: [impl] re-implement expand_case_fold_string() for
+                   ONIGENC_CASE_FOLD_MULTI_CHAR.
+2006/10/30: [impl] add NSTR_DONT_GET_OPTINFO flag.
 2006/10/30: [impl] (thanks K.Takata)
                    add THREAD_SYSTEM_INIT and THREAD_SYSTEM_END.
 2006/10/30: [bug]  (thanks Wolfgang Nadasi-Donner)
                    invalid offset value was used in STATE_CHECK_BUFF_INIT().
-
-2006/10/24: Version 4.4.5
-
-2006/10/24: [test] success in ruby 1.9.0 (2006-10-23) [i686-linux].
-2006/10/24: [impl] escape -Wall warning.
-2006/10/24: [tune] (thanks Kornelius Kalnbach)
+2006/10/27: [tune] speed up ONIGENC_MBC_CASE_FOLD() for UTF-16, UTF-32.
+                   (ASCII code check)
+2006/10/27: [tune] (thanks Kornelius Kalnbach)
                    String#scan for long string needs long time compare with
                    old Ruby
                    by initialization time for combination explosion check
                    ex. ("test " * 100_000).scan(/\w*\s?/)
                    change STATE_CHECK_BUFF_MAX_SIZE from 0x8000000 to 0x8000.
                    reduce initialization area of state_check_buff.
+2006/10/25: [impl] add DISABLE_CASE_FOLD_MULTI_CHAR().
+
+2006/10/23: Version 5.0.1
+
+2006/10/23: [bug]  should fold string in expand_case_fold_string().
+2006/10/23: [bug]  (thanks Km)
+                   too many case fold/unfold expansion problem.
+                   don't expand and set ambig flag to the string node.
+                   (except ONIGENC_CASE_FOLD_MULTI_CHAR).
+2006/10/23: [bug]  (thanks K.Takata)
+                   invalid \p{Alnum}, \p{ASCII}, [:alnum:], [:ascii:].
+                   fix OnigEncAsciiCtypeTable[] etc...
+2006/10/23: [spec] (thanks K.Takata)
+                   add [:word:] POSIX bracket.
+2006/10/23: [bug]  (thanks K.Takata)
+                   \p{Word} doesn't work.
+2006/10/20: [impl] don't expand for AMBIG_FLAG string in
+                   expand_case_fold_string().
+
+2006/10/19: Version 5.0.0
+
+2006/10/18: [bug]  ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM should be 13.
+2006/10/18: [impl] remove unused functions.
+2006/10/18: [dist] update documents.
+2006/10/18: [API]  move OnigMetaCharTableType to OnigSyntaxType.
+2006/10/18: [dev]  add too/unicode_fc.rb, unicode_pc.rb.
+2006/10/18: [dist] remove MANIFEST-RUBY from distribution.
+2006/10/18: [bug]  return duplicated code in
+                   onigenc_unicode_get_case_fold_codes_by_str().
+2006/10/18  [API]  remove ONIG_SYN_OP2_CHAR_PROPERTY_PREFIX_IS.
+2006/10/18: [dev]  add tool/19.
+2006/10/18: [dist] remove target 19 from Makefile.am.
+2006/10/17: [dist] add enc/unicode.c to target 19 of win32/Makefile.
+2006/10/17: [impl] change type for escape VC++ warning.
+2006/10/17: [API]  rename ONIGENC_CASE_FOLD_NONE to ONIGENC_CASE_FOLD_MIN.
+2006/10/17: [dist] remove INSTALL-RUBY from distribution.
+2006/10/17: [dist] update LTVERSION to "2:0:0".
+2006/10/17: [impl] remove warnings for [make CFLAGS="-g -O2 -Wall"]
+                   in the case USE_UNICODE_PROPERTIES and
+                   USE_UNICODE_CASE_FOLD_MULTI_CHAR are undefined.
+2006/10/17: [impl] remove warnings for [make CFLAGS="-g -O2 -Wall"].
+2006/10/17: [impl] re-implement onigenc_unicode_apply_all_case_fold().
+                   multi-char by case folded char-class is treated as
+                   caseless-string (ambig flag on).
+                   enable OP_EXACT1_IC and OP_EXACTN_IC.
+2006/10/16: [bug]  unfold expand for 1->2, 1->3 folding in
+                   onigenc_unicode_apply_all_case_fold().
+                   add CaseFoldExpand_12[], CaseFoldExpand_13[].
 2006/10/16: [bug]  (thanks Akinori Musha)
                    first argument of rb_warn() should be format string.
-2006/10/10: [impl] add msa.state_check_buff_size initialization
+2006/10/16: [impl] add msa.state_check_buff_size initialization
                    in onig_search().
+2006/10/16: [spec] re-implement Unicode Caseless Match codes.
 2006/10/10: [bug]  should call onig_st_free_table() in
                    onig_free_shared_cclass_table().
+2006/10/10: [impl] remove OnigCompCaseFoldCodes.
+2006/10/10: [impl] remove onigenc_ascii_is_mbc_ambiguous() and
+                   onigenc_mbn_is_mbc_ambiguous().
+2006/10/10: [API]  remove is_mbc_ambiguous() member from OnigEncodingType.
+2006/10/10: [API]  rename onig_set_default_ambig_flag() to
+                   onig_set_default_case_fold_flag(),
+                   onig_get_default_ambig_flag() to
+                   onig_get_default_case_fold_flag(),
+                   onig_get_ambig_flag() to onig_get_case_fold_flag().
+2006/10/10: [API]  rename ambig_flag to case_fold_flag.
+2006/10/10: [API]  rename OnigAmbigType to OnigCaseFoldType.
+2006/10/10: [impl] rename ONIGENC_IS_CODE_SB_WORD() to IS_CODE_SB_WORD()
+                   and move to regint.h.
 2006/10/10: [impl] remove OP_WORD_SB and OP_WORD_MB.
+2006/10/10: [impl] remove OP_EXACT1_IC and OP_EXACTN_IC from match_at().
+2006/10/10: [impl] should free new_str in expand_case_fold_string().
+2006/10/06: [dist] add test entrys to sample/encode.c.
+2006/10/06: [impl] re-implement caseless match (case-fold).
+2006/10/06: [impl] expand string node by case fold variations.
+                   add expand_case_fold_string().
+2006/10/05: [spec] rename OnigCompAmbigCodeItem to OnigCaseFoldCodeItem.
+2006/10/05: [spec] add apply_all_case_fold() and get_case_fold_codes_by_str()
+                   to OnigEncodingType.
+2006/10/05: [spec] remove ambig_flag, get_all_pair_ambig_codes() and
+                   get_all_comp_ambig_codes() member from OnigEncodingType.
+2006/10/03: [impl] rename mbc_to_normalize() to mbc_case_fold().
+2006/10/03: [spec] rename ONIGENC_AMBIGUOUS_MATCH_XXX
+                   to ONIGENC_CASE_FOLD_XXX.
+                   rename ONIGENC_CASE_FOLD_COMPOUND
+                   to ONIGENC_CASE_FOLD_MULTI_CHAR.
+2006/10/02: [impl] remove all ONIG_RUBY_M17N part.
 2006/09/29: [impl] initialize state_check_buff_size in STATE_CHECK_BUFF_INIT().
                    make valgrind happy.
-2006/09/22: [impl] convert to ascii for parameter string in
+2006/09/22: [impl] remove parse time ctype values (CTYPE_WORD etc...)
+2006/09/22: [ruby] enable USE_BACKREF_AT_LEVEL for Ruby mode.
+2006/09/22: [spec] (thanks Allan Odgaard)
+                   allow upper case letter as the first character
+                   of group name.
+                   fetch_name() and fetch_name_with_level()
+2006/09/21: [impl] convert to ascii for parameter string in
                    onig_error_code_to_str().
                    add enc member into OnigErrorInfo.
-
-2006/09/19: Version 4.4.4
-
-2006/09/19: [test] success in ruby 1.9.0 (2006-08-22) [i686-linux].
+2006/09/21: [dist] update documents for Unicode Property.
+2006/09/21: [new]  add Unicode Properties. (enc/unicode.c)
+                   Any, Assigned, C, Cc, L, Lm, Arabic, Greek etc...
+2006/09/21: [impl] add USE_UNICODE_PROPERTIES into regenc.h.
+2006/09/21: [impl] remove USE_UNICODE_FULL_RANGE_CTYPE.
+2006/09/20: [impl] change ONIGENC_CTYPE_XXXX to sequencial values.
+                   add BIT_CTYPE_XXXX bit flags to regenc.h.
+                   update XXXX_CtypeTable[] for BIT_CTYPE_ALNUM.
+2006/09/19: [memo] move from CVS to Subversion (1.3.2).
 2006/09/19: [impl] (thanks KOYAMA Tetsuji)
                    HAVE_STDARG_PROTOTYPES was not defined in Mac OS X
                    by Xcode 2.4(gcc 4.0.1) problem. [php-dev 1312] etc...
-
-2006/09/15: Version 4.4.3
-
-2006/09/15: [test] success in ruby 1.9.0 (2006-08-22) [i686-linux].
 2006/09/15: [bug]  (thanks Allan Odgaard)
                    out of range access in bm_search_notrev().
                    (p < s)
+2006/09/13: [impl] add ONIGENC_CTYPE_ENC_EXT flag.
+2006/09/13: [spec] remove 'Is' prefix check for property name
+                   from fetch_char_property_to_ctype().
+2006/09/13: [API]  add property_name_to_ctype member to OnigEncodingType.
+2006/09/12: [spec][ruby] add ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY and
+                   ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT to OnigSyntaxRuby.
 
 2006/09/08: Version 4.4.2
 
@@ -1808,8 +2013,17 @@ History
 [inst: changes for installation]
 [dist: distribution change]
 [test: test]
+[dev:  development]
 [memo: memo]
 --
+<create tag>
+svn copy file:///home/kosako/svnreps/svnrep_onig/trunk file:///home/kosako/svnreps/svnrep_onig/tags/5.0.0 -m "ADD TAG: 5.0.0"
+
+<set ignore files by .cvsignore>
+svn propset svn:ignore -F .cvsignore .
+svn commit -m "..."
+
+
 <CVS: show all tags>
 cvs history -T
 
@@ -1820,7 +2034,7 @@ cvs rtag "VERSION_X_X_X" oniguruma
 <GNU Autotools: bootstrap>
 * write Makefile.am and configure.in.
 > aclocal
-> libtoolize
+> libtoolize   or   glibtoolize
 > automake --foreign --add-missing
 > autoconf
 > configure --with-rubydir=... CFLAGS="-O2 -Wall"
index dff7fba5622517bc08ba0e48f0f245d4a9f1ac23..8390afd0508f5f877bba9ac3a0ed2f4f32892984 100644 (file)
@@ -1,9 +1,8 @@
-README  2007/06/18
+README  2007/05/31
 
 Oniguruma  ----   (C) K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
 
 http://www.geocities.jp/kosako3/oniguruma/
-http://www.freebsd.org/cgi/cvsweb.cgi/ports/devel/oniguruma/
 
 Oniguruma is a regular expressions library.
 The characteristics of this library is that different character encoding
@@ -13,16 +12,20 @@ Supported character encodings:
 
   ASCII, UTF-8, UTF-16BE, UTF-16LE, UTF-32BE, UTF-32LE,
   EUC-JP, EUC-TW, EUC-KR, EUC-CN,
-  Shift_JIS, Big5, GB 18030, KOI8-R, KOI8,
+  Shift_JIS, Big5, GB18030, KOI8-R, CP1251,
   ISO-8859-1, ISO-8859-2, ISO-8859-3, ISO-8859-4, ISO-8859-5,
   ISO-8859-6, ISO-8859-7, ISO-8859-8, ISO-8859-9, ISO-8859-10,
   ISO-8859-11, ISO-8859-13, ISO-8859-14, ISO-8859-15, ISO-8859-16
 
-* GB 18030: contributed by KUBO Takehiro
-* KOI8 is not included in library archive by default setup.
-  (need to edit Makefile if you want to use it.)
+* GB18030: contributed by KUBO Takehiro
+* CP1251:  contributed by Byte
 ------------------------------------------------------------
 
+License
+
+   BSD license.
+
+
 Install
 
  Case 1: Unix and Cygwin platform
@@ -63,14 +66,6 @@ Install
 
 
 
-License
-
-   When this software is partly used or it is distributed with Ruby, 
-   this of Ruby follows the license of Ruby.
-   It follows the BSD license in the case of the one except for it.
-
-
-
 Regular Expressions
 
   See doc/RE (or doc/RE.ja for Japanese).
@@ -108,7 +103,10 @@ Sample Programs
   sample/posix.c     POSIX API sample.
   sample/sql.c       example of the variable meta characters.
                      (SQL-like pattern matching)
+
+Test Programs
   sample/syntax.c    Perl, Java and ASIS syntax test.
+  sample/crnl.c      --enable-crnl-as-line-terminator test
 
 
 Source Files
@@ -145,9 +143,10 @@ Source Files
   enc/euc_kr.c       EUC-KR, EUC-CN encoding.
   enc/sjis.c         Shift_JIS encoding.
   enc/big5.c         Big5      encoding.
-  enc/gb18030.c      GB 18030  encoding  (contributed by KUBO Takehiro)
+  enc/gb18030.c      GB18030   encoding.
   enc/koi8.c         KOI8      encoding.
   enc/koi8_r.c       KOI8-R    encoding.
+  enc/cp1251.c       CP1251    encoding.
   enc/iso8859_1.c    ISO-8859-1  encoding. (Latin-1)
   enc/iso8859_2.c    ISO-8859-2  encoding. (Latin-2)
   enc/iso8859_3.c    ISO-8859-3  encoding. (Latin-3)
@@ -176,14 +175,15 @@ Source Files
 
 
 
-API differences with Japanized GNU regex(version 0.12) of Ruby 1.8/1.6
-
-   + re_compile_fastmap() is removed.
-   + re_alloc_pattern() is added.
-
+ToDo
 
+  ? case fold flag: Katakana <-> Hiragana.
+  ? add ONIG_OPTION_NOTBOS/NOTEOS. (\A, \z, \Z)
+ ?? \X (== \PM\pM*)
+ ?? implement syntax behavior ONIG_SYN_CONTEXT_INDEP_ANCHORS.
+ ?? transmission stopper. (return ONIG_STOP from match_at())
 
-I'm thankful to Akinori MUSHA.
+and I'm thankful to Akinori MUSHA.
 
 
 Mail Address: K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
index 2dee793caed8f0ff05fb460f78b5f0123d65ba8e..b14822c9e6254560f9c8f05da9800fc8e13844db 100644 (file)
@@ -1,9 +1,8 @@
-README.ja  2007/06/18
+README.ja  2007/05/31
 
 µ´¼Ö  ----   (C) K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
 
 http://www.geocities.jp/kosako3/oniguruma/
-http://www.freebsd.org/cgi/cvsweb.cgi/ports/devel/oniguruma/
 
 µ´¼Ö¤ÏÀµµ¬É½¸½¥é¥¤¥Ö¥é¥ê¤Ç¤¢¤ë¡£
 ¤³¤Î¥é¥¤¥Ö¥é¥ê¤ÎÆÃĹ¤Ï¡¢¤½¤ì¤¾¤ì¤ÎÀµµ¬É½¸½¥ª¥Ö¥¸¥§¥¯¥È¤´¤È¤Ë
@@ -13,16 +12,20 @@ http://www.freebsd.org/cgi/cvsweb.cgi/ports/devel/oniguruma/
 
   ASCII, UTF-8, UTF-16BE, UTF-16LE, UTF-32BE, UTF-32LE,
   EUC-JP, EUC-TW, EUC-KR, EUC-CN,
-  Shift_JIS, Big5, GB 18030, KOI8-R, KOI8,
+  Shift_JIS, Big5, GB18030, KOI8-R, CP1251,
   ISO-8859-1, ISO-8859-2, ISO-8859-3, ISO-8859-4, ISO-8859-5,
   ISO-8859-6, ISO-8859-7, ISO-8859-8, ISO-8859-9, ISO-8859-10,
   ISO-8859-11, ISO-8859-13, ISO-8859-14, ISO-8859-15, ISO-8859-16
 
-* GB 18030: µ×ÊÝ·òÍλáÄó¶¡
-* KOI8¤Ï¥Ç¥Õ¥©¥ë¥È¤Î¥»¥Ã¥È¥¢¥Ã¥×¤Ç¤Ï¥é¥¤¥Ö¥é¥ê¤ÎÃæ¤Ë´Þ¤Þ¤ì¤Ê¤¤¡£
-  (ɬÍפǤ¢¤ì¤ÐMakefile¤òÊÔ½¸¤¹¤ë¤³¤È)
+* GB18030: µ×ÊÝ·òÍλáÄó¶¡
+* CP1251:  Byte»áÄó¶¡
 ------------------------------------------------------------
 
+¥é¥¤¥»¥ó¥¹
+
+  BSD¥é¥¤¥»¥ó¥¹¤Ë½¾¤¦¡£
+
+
 ¥¤¥ó¥¹¥È¡¼¥ë
 
  ¥±¡¼¥¹£±: Unix¤ÈCygwin´Ä¶­
@@ -63,12 +66,6 @@ http://www.freebsd.org/cgi/cvsweb.cgi/ports/devel/oniguruma/
    5. nmake ctest
 
 
-¥é¥¤¥»¥ó¥¹
-
-  ¤³¤Î¥½¥Õ¥È¥¦¥§¥¢¤¬Ruby¤È°ì½ï¤Ë»ÈÍѤޤ¿¤ÏÇÛÉÛ¤µ¤ì¤ë¾ì¹ç¤Ë¤Ï¡¢
-  Ruby¤Î¥é¥¤¥»¥ó¥¹¤Ë½¾¤¦¡£
-  ¤½¤ì°Ê³°¤Î¾ì¹ç¤Ë¤Ï¡¢BSD¥é¥¤¥»¥ó¥¹¤Ë½¾¤¦¡£
-
 
 Àµµ¬É½¸½
 
@@ -97,7 +94,7 @@ http://www.freebsd.org/cgi/cvsweb.cgi/ports/devel/oniguruma/
   GNU libtool¤ò»ÈÍѤ·¤Æ¤¤¤ë¤Î¤Ç¡¢¥×¥é¥Ã¥È¥Õ¥©¡¼¥à¤¬¶¦Í­¥é¥¤¥Ö¥é¥ê¤ò¥µ¥Ý¡¼¥È¤·¤Æ
   ¤¤¤ì¤Ð¡¢»ÈÍѤǤ­¤ë¤è¤¦¤Ë¤Ê¤Ã¤Æ¤¤¤ë¡£
   ÀÅۥ饤¥Ö¥é¥ê¤È¶¦Í­¥é¥¤¥Ö¥é¥ê¤Î¤É¤Á¤é¤ò»ÈÍѤ¹¤ë¤«¤ò»ØÄꤹ¤ëÊýË¡¡¢¼Â¹Ô»þÅÀ¤Ç¤Î
-  ´Ä¶­ÀßÄêÊýË¡¤Ë¤Ä¤Æ¤Ï¡¢¼«Ê¬¤ÇÄ´¤Ù¤Æ²¼¤µ¤¤¡£
+  ´Ä¶­ÀßÄêÊýË¡¤Ë¤Ä¤¤¤Æ¤Ï¡¢¼«Ê¬¤ÇÄ´¤Ù¤Æ²¼¤µ¤¤¡£
 
 
   Win32¤Ç¥¹¥¿¥Æ¥£¥Ã¥¯¥ê¥ó¥¯¥é¥¤¥Ö¥é¥ê(onig_s.lib)¤ò¥ê¥ó¥¯¤¹¤ë¾ì¹ç¤Ë¤Ï¡¢
@@ -112,7 +109,10 @@ http://www.freebsd.org/cgi/cvsweb.cgi/ports/devel/oniguruma/
   sample/listcap.c   Êá³ÍÍúÎòµ¡Ç½¤Î»ÈÍÑÎã
   sample/posix.c     POSIX API»ÈÍÑÎã
   sample/sql.c       ²ÄÊѥ᥿ʸ»úµ¡Ç½»ÈÍÑÎã (SQL-like ¥Ñ¥¿¡¼¥ó)
+
+¥Æ¥¹¥È¥×¥í¥°¥é¥à
   sample/syntax.c    Perl¡¢Java¡¢ASISʸˡ¤Î¥Æ¥¹¥È
+  sample/crnl.c      --enable-crnl-as-line-terminator ¥Æ¥¹¥È
 
 
 ¥½¡¼¥¹¥Õ¥¡¥¤¥ë
@@ -149,9 +149,10 @@ http://www.freebsd.org/cgi/cvsweb.cgi/ports/devel/oniguruma/
   enc/euc_kr.c       EUC-KR, EUC-CN ¥¨¥ó¥³¡¼¥Ç¥£¥ó¥°
   enc/sjis.c         Shift_JIS ¥¨¥ó¥³¡¼¥Ç¥£¥ó¥°
   enc/big5.c         Big5 ¥¨¥ó¥³¡¼¥Ç¥£¥ó¥°
-  enc/gb18030.c      GB 18030 ¥¨¥ó¥³¡¼¥Ç¥£¥ó¥° (µ×ÊÝ·òÍλá Äó¶¡)
+  enc/gb18030.c      GB18030 ¥¨¥ó¥³¡¼¥Ç¥£¥ó¥°
   enc/koi8.c         KOI8 ¥¨¥ó¥³¡¼¥Ç¥£¥ó¥°
   enc/koi8_r.c       KOI8-R ¥¨¥ó¥³¡¼¥Ç¥£¥ó¥°
+  enc/cp1251.c       CP1251 ¥¨¥ó¥³¡¼¥Ç¥£¥ó¥°
   enc/iso8859_1.c    ISO-8859-1  (Latin-1)
   enc/iso8859_2.c    ISO-8859-2  (Latin-2)
   enc/iso8859_3.c    ISO-8859-3  (Latin-3)
@@ -180,13 +181,15 @@ http://www.freebsd.org/cgi/cvsweb.cgi/ports/devel/oniguruma/
 
 
 
-Ruby 1.8/1.6¤ÎÆüËܸ첽GNU regex¤È¤ÎAPI¤Î°ã¤¤
-
-   + re_compile_fastmap()   ¤Ïºï½ü¤µ¤ì¤¿¡£
-   + re_alloc_pattern()     ¤¬Äɲ䵤줿¡£
+»Ä·ï
 
+  ? case fold flag: Katakana <-> Hiragana
+  ? ONIG_OPTION_NOTBOS/NOTEOSÄɲà(\A, \z, \Z)
+ ?? \X (== \PM\pM*)
+ ?? Ê¸Ë¡Í×ÁÇ ONIG_SYN_CONTEXT_INDEP_ANCHORS¤Î¼ÂÁõ
+ ?? ¸¡º÷°ÌÃÖ°ÜÆ°Ää»ß±é»»»Ò (match_at()¤«¤éONIG_STOP¤òÊÖ¤¹)
 
-I'm thankful to Akinori MUSHA.
+and I'm thankful to Akinori MUSHA.
 
 
 ¥¢¥É¥ì¥¹: K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
index 2f66287d499bc9ac54d6491237f59898f1d99125..f3b88756bc6a4bb93f147166c7371bc5a4a38013 100644 (file)
@@ -1,4 +1,4 @@
-Oniguruma API  Version 4.7.1  2007/07/04
+Oniguruma API  Version 5.9.2  2008/02/19
 
 #include <oniguruma.h>
 
@@ -105,10 +105,10 @@ Oniguruma API  Version 4.7.1  2007/07/04
       ONIG_ENCODING_EUC_KR        EUC-KR
       ONIG_ENCODING_EUC_CN        EUC-CN
       ONIG_ENCODING_SJIS          Shift_JIS
-      ONIG_ENCODING_KOI8          KOI8
       ONIG_ENCODING_KOI8_R        KOI8-R
+      ONIG_ENCODING_CP1251        CP1251
       ONIG_ENCODING_BIG5          Big5
-      ONIG_ENCODING_GB18030       GB 18030
+      ONIG_ENCODING_GB18030       GB18030
 
       or any OnigEncodingType data address defined by user.
 
@@ -134,6 +134,18 @@ Oniguruma API  Version 4.7.1  2007/07/04
 
 
 
+# int onig_new_without_alloc(regex_t* reg, const UChar* pattern,
+            const UChar* pattern_end,
+            OnigOptionType option, OnigEncoding enc, OnigSyntaxType* syntax,
+            OnigErrorInfo* err_info)
+
+  Create a regex object.
+  reg object area is not allocated in this function.
+
+  normal return: ONIG_NORMAL
+
+
+
 # int onig_new_deluxe(regex_t** reg, const UChar* pattern, const UChar* pattern_end,
                       OnigCompileInfo* ci, OnigErrorInfo* einfo)
 
@@ -153,15 +165,12 @@ Oniguruma API  Version 4.7.1  2007/07/04
     ci->target_enc:      target string character encoding.
     ci->syntax:          address of pattern syntax definition.
     ci->option:          compile time option.
-    ci->ambig_flag:      character matching ambiguity bit flag for 
+    ci->case_fold_flag:  character matching case fold bit flag for 
                          ONIG_OPTION_IGNORECASE mode.
 
-       ONIGENC_AMBIGUOUS_MATCH_NONE:          exact
-       ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE:    ignore case for ASCII
-       ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE: ignore case for non-ASCII
-       ONIGENC_AMBIGUOUS_MATCH_FULL:          all ambiguity on
-       ONIGENC_AMBIGUOUS_MATCH_DEFAULT:       (ASCII | NONASCII)
-                                              onig_set_default_ambig_flag()
+       ONIGENC_CASE_FOLD_MIN:           minimum
+       ONIGENC_CASE_FOLD_DEFAULT:       minimum
+                                        onig_set_default_case_fold_flag()
 
   5 err_info:    address for return optional error info.
                  Use this value as 3rd argument of onig_error_code_to_str().
@@ -188,6 +197,14 @@ Oniguruma API  Version 4.7.1  2007/07/04
   1 reg: regex object.
 
 
+# void onig_free_body(regex_t* reg)
+
+  Free memory used by regex object. (Except reg oneself.)
+
+  arguments
+  1 reg: regex object.
+
+
 # int onig_search(regex_t* reg, const UChar* str, const UChar* end, const UChar* start,
                    const UChar* range, OnigRegion* region, OnigOptionType option)
 
@@ -202,8 +219,8 @@ Oniguruma API  Version 4.7.1  2007/07/04
   3 end:    terminate address of target string
   4 start:  search start address of target string
   5 range:  search terminate address of target string
-    in forward search  (start <= searched string head < range)
-    in backward search (range <= searched string head <= start)
+    in forward search  (start <= searched string < range)
+    in backward search (range <= searched string <= start)
   6 region: address for return group match range info (NULL is allowed)
   7 option: search time option
 
@@ -335,10 +352,10 @@ Oniguruma API  Version 4.7.1  2007/07/04
   1 reg:     regex object.
 
 
-# OnigEncoding    onig_get_encoding(regex_t* reg)
-# OnigOptionType  onig_get_options(regex_t* reg)
-# OnigAmbigType   onig_get_ambig_flag(regex_t* reg)
-# OnigSyntaxType* onig_get_syntax(regex_t* reg)
+# OnigEncoding     onig_get_encoding(regex_t* reg)
+# OnigOptionType   onig_get_options(regex_t* reg)
+# OnigCaseFoldType onig_get_case_fold_flag(regex_t* reg)
+# OnigSyntaxType*  onig_get_syntax(regex_t* reg)
 
   Return a value of the regex object.
 
@@ -518,7 +535,7 @@ Oniguruma API  Version 4.7.1  2007/07/04
   2 from: source address.
 
 
-# int onig_set_meta_char(OnigEncoding enc, unsigned int what,
+# int onig_set_meta_char(OnigSyntaxType* syntax, unsigned int what,
                          OnigCodePoint code)
 
   Set a variable meta character to the code point value.
@@ -529,8 +546,8 @@ Oniguruma API  Version 4.7.1  2007/07/04
   normal return: ONIG_NORMAL
 
   arguments
-  1 enc:  target encoding
-  2 what: specifies which meta character it is.
+  1 syntax: target syntax
+  2 what:   specifies which meta character it is.
 
           ONIG_META_CHAR_ESCAPE
           ONIG_META_CHAR_ANYCHAR
@@ -542,16 +559,16 @@ Oniguruma API  Version 4.7.1  2007/07/04
   3 code: meta character or ONIG_INEFFECTIVE_META_CHAR.
 
 
-# OnigAmbigType onig_get_default_ambig_flag()
+# OnigCaseFoldType onig_get_default_case_fold_flag()
 
-  Get default ambig flag.
+  Get default case fold flag.
 
 
-# int onig_set_default_ambig_flag(OnigAmbigType ambig_flag)
+# int onig_set_default_case_fold_flag(OnigCaseFoldType case_fold_flag)
 
-  Set default ambig flag.
+  Set default case fold flag.
 
-  1 ambig_flag: ambiguity flag
+  1 case_fold_flag: case fold flag
 
 
 # unsigned int onig_get_match_stack_limit_size(void)
@@ -580,6 +597,6 @@ Oniguruma API  Version 4.7.1  2007/07/04
 
 # const char* onig_version(void)
 
-  Return version string.  (ex. "2.2.8")
+  Return version string.  (ex. "5.0.3")
 
 // END
index f2a8bd6f10d0c5eba42f7538a4794a25ab95f927..f681fa546089190dd35552972c87592b65eb8ea3 100644 (file)
@@ -1,4 +1,4 @@
-µ´¼Ö¥¤¥ó¥¿¡¼¥Õ¥§¡¼¥¹ Version 4.7.1   2007/07/04
+µ´¼Ö¥¤¥ó¥¿¡¼¥Õ¥§¡¼¥¹ Version 5.9.2   2008/02/19
 
 #include <oniguruma.h>
 
       ONIG_ENCODING_EUC_KR        EUC-KR
       ONIG_ENCODING_EUC_CN        EUC-CN
       ONIG_ENCODING_SJIS          Shift_JIS
-      ONIG_ENCODING_KOI8          KOI8
       ONIG_ENCODING_KOI8_R        KOI8-R
+      ONIG_ENCODING_CP1251        CP1251
       ONIG_ENCODING_BIG5          Big5
-      ONIG_ENCODING_GB18030       GB 18030
+      ONIG_ENCODING_GB18030       GB18030
 
       ¤Þ¤¿¤Ï¡¢¥æ¡¼¥¶¤¬ÄêµÁ¤·¤¿OnigEncodingType¥Ç¡¼¥¿¤Î¥¢¥É¥ì¥¹
 
               onig_error_code_to_str()¤Î»°ÈÖÌܤΰú¿ô¤È¤·¤Æ»ÈÍѤ¹¤ë
 
 
+
+# int onig_new_without_alloc(regex_t* reg, const UChar* pattern,
+            const UChar* pattern_end,
+            OnigOptionType option, OnigEncoding enc, OnigSyntaxType* syntax,
+            OnigErrorInfo* err_info)
+
+  Àµµ¬É½¸½¥ª¥Ö¥¸¥§¥¯¥È(regex)¤òºîÀ®¤¹¤ë¡£
+  reg¤ÎÎΰè¤òÆâÉô¤Ç³ä¤êÅö¤Æ¤Ê¤¤¡£
+
+  Àµ¾ï½ªÎ»Ìá¤êÃÍ: ONIG_NORMAL
+
+
+
 # int onig_new_deluxe(regex_t** reg, const UChar* pattern, const UChar* pattern_end,
                       OnigCompileInfo* ci, OnigErrorInfo* einfo)
 
     ci->target_enc:      ÂоÝʸ»úÎó¤Îʸ»ú¥¨¥ó¥³¡¼¥Ç¥£¥ó¥°
     ci->syntax:          Àµµ¬É½¸½¥Ñ¥¿¡¼¥óʸˡÄêµÁ
     ci->option:          Àµµ¬É½¸½¥³¥ó¥Ñ¥¤¥ë»þ¥ª¥×¥·¥ç¥ó
-    ci->ambig_flag:      ONIG_OPTION_IGNORECASE¥â¡¼¥É¤Ç¤Î
+    ci->case_fold_flag:  ONIG_OPTION_IGNORECASE¥â¡¼¥É¤Ç¤Î
                          Ê¸»úÛ£Ëæ¥Þ¥Ã¥Á»ØÄê¥Ó¥Ã¥È¥Õ¥é¥°
 
-       ONIGENC_AMBIGUOUS_MATCH_NONE:          Û£ËæÌµ¤·
-       ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE:    ASCII¤ÎÂçʸ»ú¾®Ê¸»ú
-       ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE: ASCII°Ê³°¤ÎÂçʸ»ú¾®Ê¸»ú
-       ONIGENC_AMBIGUOUS_MATCH_FULL:          Á´¤Æ¤ÎÛ£Ëæ¥Õ¥é¥°Í­¸ú
-       ONIGENC_AMBIGUOUS_MATCH_DEFAULT:       (ASCII | NONASCII)
-                                              onig_set_default_ambig_flag()
+       ONIGENC_CASE_FOLD_MIN:           ºÇ¾®
+       ONIGENC_CASE_FOLD_DEFAULT:       ºÇ¾®
+                                        onig_set_default_case_fold_flag()
 
   5 err_info:    ¥¨¥é¡¼¾ðÊó¤òÊÖ¤¹¤¿¤á¤Î¥¢¥É¥ì¥¹
                  onig_error_code_to_str()¤Î»°ÈÖÌܤΰú¿ô¤È¤·¤Æ»ÈÍѤ¹¤ë
   1 reg: Àµµ¬É½¸½¥ª¥Ö¥¸¥§¥¯¥È
 
 
+# void onig_free_body(regex_t* reg)
+
+  Àµµ¬É½¸½¥ª¥Ö¥¸¥§¥¯¥È¤Î¥á¥â¥ê¤ò²òÊü¤¹¤ë¡£(reg¼«¿È¤ÎÎΰè¤ò½ü¤¤¤Æ)
+
+  °ú¿ô
+  1 reg: Àµµ¬É½¸½¥ª¥Ö¥¸¥§¥¯¥È
+
+
 
 # int onig_search(regex_t* reg, const UChar* str, const UChar* end, const UChar* start,
                    const UChar* range, OnigRegion* region, OnigOptionType option)
   1 reg:    Àµµ¬É½¸½¥ª¥Ö¥¸¥§¥¯¥È
   2 str:    ¸¡º÷ÂоÝʸ»úÎó
   3 end:    ¸¡º÷ÂоÝʸ»úÎó¤Î½ªÃ¼¥¢¥É¥ì¥¹
-  4 start:  ¸¡º÷ÂоÝʸ»úÎó¤Î¸¡º÷ÀèÆ¬°ÌÃÖ³«»Ï¥¢¥É¥ì¥¹
-  5 range:  ¸¡º÷ÂоÝʸ»úÎó¤Î¸¡º÷ÀèÆ¬°ÌÃÖ½ªÃ¼¥¢¥É¥ì¥¹
-    Á°Êýõº÷  (start <= Ãµº÷¤µ¤ì¤ëʸ»úÎó¤ÎÀèÆ¬ < range)
-    ¸åÊýõº÷  (range <= Ãµº÷¤µ¤ì¤ëʸ»úÎó¤ÎÀèÆ¬ <= start)
+  4 start:  ¸¡º÷ÂоÝʸ»úÎó¤Î¸¡º÷ÀèÆ¬°ÌÃÖ¥¢¥É¥ì¥¹
+  5 range:  ¸¡º÷ÂоÝʸ»úÎó¤Î¸¡º÷½ªÎ»°ÌÃÖ¥¢¥É¥ì¥¹
+    Á°Êýõº÷  (start <= Ãµº÷¤µ¤ì¤ëʸ»úÎó < range)
+    ¸åÊýõº÷  (range <= Ãµº÷¤µ¤ì¤ëʸ»úÎó <= start)
   6 region: ¥Þ¥Ã¥ÁÎΰè¾ðÊó(region)  (NULL¤âµö¤µ¤ì¤ë)
   7 option: ¸¡º÷»þ¥ª¥×¥·¥ç¥ó
 
   1 reg:    Àµµ¬É½¸½¥ª¥Ö¥¸¥§¥¯¥È
 
 
-# OnigEncoding    onig_get_encoding(regex_t* reg)
-# OnigOptionType  onig_get_options(regex_t* reg)
-# OnigAmbigType   onig_get_ambig_flag(regex_t* reg)
-# OnigSyntaxType* onig_get_syntax(regex_t* reg)
+# OnigEncoding     onig_get_encoding(regex_t* reg)
+# OnigOptionType   onig_get_options(regex_t* reg)
+# OnigCaseFoldType onig_get_case_fold_flag(regex_t* reg)
+# OnigSyntaxType*  onig_get_syntax(regex_t* reg)
 
   Àµµ¬É½¸½¥ª¥Ö¥¸¥§¥¯¥È¤ËÂФ·¤Æ¡¢Âбþ¤¹¤ëÃͤòÊÖ¤¹¡£
 
   2 from: ¸µ
 
 
-# int onig_set_meta_char(OnigEncoding enc, unsigned int what,
+# int onig_set_meta_char(OnigSyntaxType* syntax, unsigned int what,
                          OnigCodePoint code)
 
   ¥á¥¿Ê¸»ú¤ò»ØÄꤷ¤¿¥³¡¼¥É¥Ý¥¤¥ó¥ÈÃͤ˥»¥Ã¥È¤¹¤ë¡£
   Àµ¾ï½ªÎ»Ìá¤êÃÍ: ONIG_NORMAL
 
   °ú¿ô
-  1 enc:  ÂоÝʸ»ú¥¨¥ó¥³¡¼¥Ç¥£¥ó¥°
-  2 what: ¥á¥¿Ê¸»úµ¡Ç½¤Î»ØÄê
+  1 syntax: ÂоÝʸˡ
+  2 what:   ¥á¥¿Ê¸»úµ¡Ç½¤Î»ØÄê
 
           ONIG_META_CHAR_ESCAPE
           ONIG_META_CHAR_ANYCHAR
   3 code: ¥á¥¿Ê¸»ú¤Î¥³¡¼¥É¥Ý¥¤¥ó¥È ¤Þ¤¿¤Ï ONIG_INEFFECTIVE_META_CHAR.
 
 
-# OnigAmbigType onig_get_default_ambig_flag()
+# OnigCaseFoldType onig_get_default_case_fold_flag()
 
-  ¥Ç¥Õ¥©¥ë¥È¤ÎÛ£Ëæ¥Þ¥Ã¥Á¥Õ¥é¥°¤ò¼èÆÀ¤¹¤ë¡£
+  ¥Ç¥Õ¥©¥ë¥È¤Îcase fold¥Õ¥é¥°¤ò¼èÆÀ¤¹¤ë¡£
 
 
-# int onig_set_default_ambig_flag(OnigAmbigType ambig_flag)
+# int onig_set_default_case_fold_flag(OnigCaseFoldType case_fold_flag)
 
-  ¥Ç¥Õ¥©¥ë¥È¤ÎÛ£Ëæ¥Þ¥Ã¥Á¥Õ¥é¥°¤ò¥»¥Ã¥È¤¹¤ë¡£
+  ¥Ç¥Õ¥©¥ë¥È¤Îcase fold¥Õ¥é¥°¤ò¥»¥Ã¥È¤¹¤ë¡£
 
   °ú¿ô
-  1 ambig_flag: Û£Ëæ¥Þ¥Ã¥Á¥Õ¥é¥°
+  1 case_fold_flag: case fold¥Õ¥é¥°
 
 
 # unsigned int onig_get_match_stack_limit_size(void)
 
 # const char* onig_version(void)
 
-  ¥Ð¡¼¥¸¥ç¥óʸ»úÎó¤òÊÖ¤¹¡£(Îã "2.2.8")
+  ¥Ð¡¼¥¸¥ç¥óʸ»úÎó¤òÊÖ¤¹¡£(Îã "5.0.3")
 
 // END
index dccf242c8d5b4976a81d7c8e2e92e6c57de26404..46a3e0e08b7d86a8fd40ceea7235d922ab6ce20d 100644 (file)
@@ -1,4 +1,4 @@
-FAQ    2006/10/30
+FAQ    2006/11/14
 
 1. Lognest match
 
@@ -12,7 +12,7 @@ FAQ    2006/10/30
 
    (A) Oniguruma Layer
 
-       Define the macro below at NOT_RUBY case in oniguruma/regint.h.
+       Define the macro below in oniguruma/regint.h.
 
        USE_MULTI_THREAD_SYSTEM
        THREAD_ATOMIC_START
index 5582765ee6db1c0d61fd15702a9a8cd610e5a830..1d65f9fb62753668353969fd76734381ad8cd543 100644 (file)
@@ -1,4 +1,4 @@
-FAQ    2006/10/30
+FAQ    2007/07/23
 
 1. ºÇĹ¥Þ¥Ã¥Á
 
@@ -13,7 +13,7 @@ FAQ    2006/10/30
 
    (A) Oniguruma Layer
 
-       oniguruma/regint.h¤ÎÃæ¤ÎNOT_RUBY¤ÎÉôʬ¤Î°Ê²¼¤Î¥Þ¥¯¥í¤òÄêµÁ¤¹¤ë¡£
+       oniguruma/regint.h¤ÎÃæ¤Î°Ê²¼¤Î¥Þ¥¯¥í¤òÄêµÁ¤¹¤ë¡£
 
        USE_MULTI_THREAD_SYSTEM
        THREAD_ATOMIC_START
@@ -35,7 +35,16 @@ FAQ    2006/10/30
    "¥¹¥ì¥Ã¥É¥»¡¼¥Õ¤Ë´Ø¤¹¤ëÊä­"¤Ë½ñ¤¤¤Æ¤ª¤¤¤¿¡£
 
 
-3. ¥á¡¼¥ê¥ó¥°¥ê¥¹¥È
+3. CR + LF
+
+   DOS¤Î²þ¹Ô(CR(0x0c) + LF(0x0a)¤ÎϢ³)
+
+   regenc.h¤ÎÃæ¤Î¡¢°Ê²¼¤ÎÉôʬ¤òÍ­¸ú¤Ë¤¹¤ë¡£
+
+     /* #define USE_CRNL_AS_LINE_TERMINATOR */
+
+
+4. ¥á¡¼¥ê¥ó¥°¥ê¥¹¥È
 
    µ´¼Ö¤Ë´Ø¤¹¤ë¥á¡¼¥ê¥ó¥°¥ê¥¹¥È¤Ï¸ºß¤·¤Ê¤¤¡£
 
@@ -59,8 +68,7 @@ Oniguruma
 
 (A) Oniguruma¤ÎÃæ¤ÇÂбþ¤¹¤ë¾ì¹ç
 
-oniguruma/regint.h¤ÎÃæ¤ÎNOT_RUBY¤Ç°Ï¤Þ¤ì¤Æ¤¤¤ëÉôʬ¤ÎÃæ¤Ç
-°Ê²¼¤Î¥Þ¥¯¥í¤òÄêµÁ¤·¤ÆºÆ¥³¥ó¥Ñ¥¤¥ë¤·¤Æ¤¯¤À¤µ¤¤¡£
+oniguruma/regint.h¤ÎÃæ¤Ç°Ê²¼¤Î¥Þ¥¯¥í¤òÄêµÁ¤·¤ÆºÆ¥³¥ó¥Ñ¥¤¥ë¤·¤Æ¤¯¤À¤µ¤¤¡£
 
 USE_MULTI_THREAD_SYSTEM
 
index 5a2783d167d22a4871ca01aed3426256dd5bd08c..21efe531a46c5832e33d2825d936e07cbec99f87 100644 (file)
@@ -1,4 +1,4 @@
-Oniguruma Regular Expressions Version 4.3.0    2006/08/17
+Oniguruma Regular Expressions Version 5.9.1    2007/09/05
 
 syntax: ONIG_SYNTAX_RUBY (default)
 
@@ -70,6 +70,38 @@ syntax: ONIG_SYNTAX_RUBY (default)
   \H       non hexadecimal digit char
 
 
+  Character Property
+
+    * \p{property-name}
+    * \p{^property-name}    (negative)
+    * \P{property-name}     (negative)
+
+    property-name:
+
+     + works on all encodings
+       Alnum, Alpha, Blank, Cntrl, Digit, Graph, Lower,
+       Print, Punct, Space, Upper, XDigit, Word, ASCII,
+
+     + works on EUC_JP, Shift_JIS
+       Hiragana, Katakana
+
+     + works on UTF8, UTF16, UTF32
+       Any, Assigned, C, Cc, Cf, Cn, Co, Cs, L, Ll, Lm, Lo, Lt, Lu,
+       M, Mc, Me, Mn, N, Nd, Nl, No, P, Pc, Pd, Pe, Pf, Pi, Po, Ps,
+       S, Sc, Sk, Sm, So, Z, Zl, Zp, Zs, 
+       Arabic, Armenian, Bengali, Bopomofo, Braille, Buginese,
+       Buhid, Canadian_Aboriginal, Cherokee, Common, Coptic,
+       Cypriot, Cyrillic, Deseret, Devanagari, Ethiopic, Georgian,
+       Glagolitic, Gothic, Greek, Gujarati, Gurmukhi, Han, Hangul,
+       Hanunoo, Hebrew, Hiragana, Inherited, Kannada, Katakana,
+       Kharoshthi, Khmer, Lao, Latin, Limbu, Linear_B, Malayalam,
+       Mongolian, Myanmar, New_Tai_Lue, Ogham, Old_Italic, Old_Persian,
+       Oriya, Osmanya, Runic, Shavian, Sinhala, Syloti_Nagri, Syriac,
+       Tagalog, Tagbanwa, Tai_Le, Tamil, Telugu, Thaana, Thai, Tibetan,
+       Tifinagh, Ugaritic, Yi
+
+
+
 4. Quantifier
 
   greedy
@@ -111,11 +143,7 @@ syntax: ONIG_SYNTAX_RUBY (default)
   \A      beginning of string
   \Z      end of string, or before newline at the end
   \z      end of string
-  \G      matching start position (*)
-
-          * Ruby Regexp:
-                 previous end-of-match position
-                (This specification is not related to this library.)
+  \G      matching start position 
 
 
 6. Character class
@@ -135,40 +163,43 @@ syntax: ONIG_SYNTAX_RUBY (default)
 
     Not Unicode Case:
 
-    alnum    alphabet or digit char
-    alpha    alphabet
-    ascii    code value: [0 - 127]
-    blank    \t, \x20
-    cntrl
-    digit    0-9
-    graph    include all of multibyte encoded characters
-    lower
-    print    include all of multibyte encoded characters
-    punct
-    space    \t, \n, \v, \f, \r, \x20
-    upper
-    xdigit   0-9, a-f, A-F
+      alnum    alphabet or digit char
+      alpha    alphabet
+      ascii    code value: [0 - 127]
+      blank    \t, \x20
+      cntrl
+      digit    0-9
+      graph    include all of multibyte encoded characters
+      lower
+      print    include all of multibyte encoded characters
+      punct
+      space    \t, \n, \v, \f, \r, \x20
+      upper
+      xdigit   0-9, a-f, A-F
+      word     alphanumeric, "_" and multibyte characters
 
 
     Unicode Case:
 
-    alnum    Letter | Mark | Decimal_Number
-    alpha    Letter | Mark
-    ascii    0000 - 007F
-    blank    Space_Separator | 0009
-    cntrl    Control | Format | Unassigned | Private_Use | Surrogate
-    digit    Decimal_Number
-    graph    [[:^space:]] && ^Control && ^Unassigned && ^Surrogate
-    lower    Lowercase_Letter
-    print    [[:graph:]] | [[:space:]]
-    punct    Connector_Punctuation | Dash_Punctuation | Close_Punctuation |
-             Final_Punctuation | Initial_Punctuation | Other_Punctuation |
-             Open_Punctuation
-    space    Space_Separator | Line_Separator | Paragraph_Separator |
-             0009 | 000A | 000B | 000C | 000D | 0085
-    upper    Uppercase_Letter
-    xdigit   0030 - 0039 | 0041 - 0046 | 0061 - 0066
-             (0-9, a-f, A-F)
+      alnum    Letter | Mark | Decimal_Number
+      alpha    Letter | Mark
+      ascii    0000 - 007F
+      blank    Space_Separator | 0009
+      cntrl    Control | Format | Unassigned | Private_Use | Surrogate
+      digit    Decimal_Number
+      graph    [[:^space:]] && ^Control && ^Unassigned && ^Surrogate
+      lower    Lowercase_Letter
+      print    [[:graph:]] | [[:space:]]
+      punct    Connector_Punctuation | Dash_Punctuation | Close_Punctuation |
+               Final_Punctuation | Initial_Punctuation | Other_Punctuation |
+               Open_Punctuation
+      space    Space_Separator | Line_Separator | Paragraph_Separator |
+               0009 | 000A | 000B | 000C | 000D | 0085
+      upper    Uppercase_Letter
+      xdigit   0030 - 0039 | 0041 - 0046 | 0061 - 0066
+               (0-9, a-f, A-F)
+      word     Letter | Mark | Decimal_Number | Connector_Punctuation
+
 
 
 7. Extended groups
@@ -200,9 +231,9 @@ syntax: ONIG_SYNTAX_RUBY (default)
   (?>subexp)         atomic group
                      don't backtrack in subexp.
 
-  (?<name>subexp)    define named group
-                     (All characters of the name must be a word character.
-                     And first character must not be a digit or uppper case)
+  (?<name>subexp), (?'name'subexp)
+                     define named group
+                     (All characters of the name must be a word character.)
 
                      Not only a name but a number is assigned like a captured
                      group.
@@ -215,7 +246,12 @@ syntax: ONIG_SYNTAX_RUBY (default)
 8. Back reference
 
   \n          back reference by group number (n >= 1)
+  \k<n>       back reference by group number (n >= 1)
+  \k'n'       back reference by group number (n >= 1)
+  \k<-n>      back reference by relative group number (n >= 1)
+  \k'-n'      back reference by relative group number (n >= 1)
   \k<name>    back reference by group name
+  \k'name'    back reference by group name
 
   In the back reference by the multiplex definition name,
   a subexp with a large number is referred to preferentially.
@@ -227,10 +263,17 @@ syntax: ONIG_SYNTAX_RUBY (default)
 
   back reference with nest level
 
-    (This function is disabled in Ruby 1.9.)
+    level: 0, 1, 2, ...
 
-    \k<name+n>     n: 0, 1, 2, ...
-    \k<name-n>     n: 0, 1, 2, ...
+    \k<n+level>     (n >= 1)
+    \k<n-level>     (n >= 1)
+    \k'n+level'     (n >= 1)
+    \k'n-level'     (n >= 1)
+
+    \k<name+level>
+    \k<name-level>
+    \k'name+level'
+    \k'name-level'
 
     Destinate relative nest level from back reference position.    
 
@@ -256,7 +299,11 @@ syntax: ONIG_SYNTAX_RUBY (default)
 9. Subexp call ("Tanaka Akira special")
 
   \g<name>    call by group name
+  \g'name'    call by group name
   \g<n>       call by group number (n >= 1)
+  \g'n'       call by group number (n >= 1)
+  \g<-n>      call by relative group number (n >= 1)
+  \g'-n'      call by relative group number (n >= 1)
 
   * left-most recursive call is not allowed.
      ex. (?<name>a|\g<name>b)   => error
@@ -300,7 +347,6 @@ syntax: ONIG_SYNTAX_RUBY (default)
 
   ('g' and 'G' options are argued in ruby-dev ML)
 
-  These options are not implemented in Ruby level.
 
 
 -----------------------------
@@ -317,14 +363,13 @@ A-1. Syntax depend options
 A-2. Original extensions
 
    + hexadecimal digit char type  \h, \H
-   + named group                  (?<name>...)
+   + named group                  (?<name>...), (?'name'...)
    + named backref                \k<name>
    + subexp call                  \g<name>, \g<group-num>
 
 
 A-3. Lacked features compare with perl 5.8.0
 
-   + [:word:]
    + \N{name}
    + \l,\u,\L,\U, \X, \C
    + (?{code})
@@ -334,20 +379,10 @@ A-3. Lacked features compare with perl 5.8.0
    * \Q...\E
      This is effective on ONIG_SYNTAX_PERL and ONIG_SYNTAX_JAVA.
 
-   * \p{property}, \P{property}
-     This is effective on ONIG_SYNTAX_PERL and ONIG_SYNTAX_JAVA.
-     Alnum, Alpha, Blank, Cntrl, Digit, Graph, Lower,
-     Print, Punct, Space, Upper, XDigit, ASCII are supported.
-
-     Prefix 'Is' of property name is allowed in ONIG_SYNTAX_PERL only.
-     ex. \p{IsXDigit}.
-
-     Negation operator of property is supported in ONIG_SYNTAX_PERL only.
-     \p{^...}, \P{^...}
 
+A-4. Differences with Japanized GNU regex(version 0.12) of Ruby 1.8
 
-A-4. Differences with Japanized GNU regex(version 0.12) of Ruby
-
+   + add character property (\p{property}, \P{property})
    + add hexadecimal digit char type (\h, \H)
    + add look-behind
      (?<=fixed-char-length-pattern), (?<!fixed-char-length-pattern)
@@ -401,7 +436,9 @@ A-5. Disabled functions by default syntax
 
 A-6. Problems
 
-   + Invalid encoding byte sequence is not checked in UTF-8.
+   + Invalid encoding byte sequence is not checked.
+
+     ex. UTF-8
 
      * Invalid first byte is treated as a character.
        /./u =~ "\xa3"
index 51681715c4e38b931468088519c04495137f500c..abde849fe4547f13b05829f785bc8a67387d1eb9 100644 (file)
@@ -1,4 +1,4 @@
-µ´¼Ö Àµµ¬É½¸½ Version 4.3.0    2006/08/17
+µ´¼Ö Àµµ¬É½¸½ Version 5.9.1    2007/09/05
 
 »ÈÍÑʸˡ: ONIG_SYNTAX_RUBY (´ûÄêÃÍ)
 
   \H       Èó16¿Ê¿ô»ú
 
 
+  Character Property
+
+    * \p{property-name}
+    * \p{^property-name}    (negative)
+    * \P{property-name}     (negative)
+
+    property-name:
+
+     + Á´¤Æ¤Î¥¨¥ó¥³¡¼¥Ç¥£¥ó¥°¤ÇÍ­¸ú
+       Alnum, Alpha, Blank, Cntrl, Digit, Graph, Lower,
+       Print, Punct, Space, Upper, XDigit, Word, ASCII,
+
+     + EUC-JP, Shift_JIS¤ÇÍ­¸ú
+       Hiragana, Katakana
+
+     + UTF8, UTF16, UTF32¤ÇÍ­¸ú
+       Any, Assigned, C, Cc, Cf, Cn, Co, Cs, L, Ll, Lm, Lo, Lt, Lu,
+       M, Mc, Me, Mn, N, Nd, Nl, No, P, Pc, Pd, Pe, Pf, Pi, Po, Ps,
+       S, Sc, Sk, Sm, So, Z, Zl, Zp, Zs, 
+       Arabic, Armenian, Bengali, Bopomofo, Braille, Buginese,
+       Buhid, Canadian_Aboriginal, Cherokee, Common, Coptic,
+       Cypriot, Cyrillic, Deseret, Devanagari, Ethiopic, Georgian,
+       Glagolitic, Gothic, Greek, Gujarati, Gurmukhi, Han, Hangul,
+       Hanunoo, Hebrew, Hiragana, Inherited, Kannada, Katakana,
+       Kharoshthi, Khmer, Lao, Latin, Limbu, Linear_B, Malayalam,
+       Mongolian, Myanmar, New_Tai_Lue, Ogham, Old_Italic, Old_Persian,
+       Oriya, Osmanya, Runic, Shavian, Sinhala, Syloti_Nagri, Syriac,
+       Tagalog, Tagbanwa, Tai_Le, Tamil, Telugu, Thaana, Thai, Tibetan,
+       Tifinagh, Ugaritic, Yi
+
+
 
 4. ÎÌ»ØÄê»Ò
 
   \A      Ê¸»úÎóÀèÆ¬
   \Z      Ê¸»úÎóËöÈø¡¢¤Þ¤¿¤Ïʸ»úÎóËöÈø¤Î²þ¹Ô¤ÎľÁ°
   \z      Ê¸»úÎóËöÈø
-  \G      ¾È¹ç³«»Ï°ÌÃÖ(*)
-
-          * Ruby Regexp:
-                 Á°²ó¾È¹çÀ®¸ùËöÈø°ÌÃÖ
-                (¤³¤Î»ÅÍͤÏRuby¤Î¼ÂÁõ¤Ë´Ø¤¹¤ë¤â¤Î¤Ç¤¢¤ê¡¢
-                 Àµµ¬É½¸½¥é¥¤¥Ö¥é¥ê¤È¤Ï̵´Ø·¸)
+  \G      ¾È¹ç³«»Ï°ÌÃÖ
 
 
 6. Ê¸»ú½¸¹ç
 
     Unicode°Ê³°¤Î¾ì¹ç:
 
-    alnum    ±Ñ¿ô»ú
-    alpha    ±Ñ»ú
-    ascii    0 - 127
-    blank    \t, \x20
-    cntrl
-    digit    0-9
-    graph    Â¿¥Ð¥¤¥Èʸ»úÁ´Éô¤ò´Þ¤à
-    lower
-    print    Â¿¥Ð¥¤¥Èʸ»úÁ´Éô¤ò´Þ¤à
-    punct
-    space    \t, \n, \v, \f, \r, \x20
-    upper
-    xdigit   0-9, a-f, A-F
+      alnum    ±Ñ¿ô»ú
+      alpha    ±Ñ»ú
+      ascii    0 - 127
+      blank    \t, \x20
+      cntrl
+      digit    0-9
+      graph    Â¿¥Ð¥¤¥Èʸ»úÁ´Éô¤ò´Þ¤à
+      lower
+      print    Â¿¥Ð¥¤¥Èʸ»úÁ´Éô¤ò´Þ¤à
+      punct
+      space    \t, \n, \v, \f, \r, \x20
+      upper
+      xdigit   0-9, a-f, A-F
+      word     ±Ñ¿ô»ú, "_" ¤ª¤è¤Ó Â¿¥Ð¥¤¥Èʸ»ú
 
     Unicode¤Î¾ì¹ç:
 
-    alnum    Letter | Mark | Decimal_Number
-    alpha    Letter | Mark
-    ascii    0000 - 007F
-    blank    Space_Separator | 0009
-    cntrl    Control | Format | Unassigned | Private_Use | Surrogate
-    digit    Decimal_Number
-    graph    [[:^space:]] && ^Control && ^Unassigned && ^Surrogate
-    lower    Lowercase_Letter
-    print    [[:graph:]] | [[:space:]]
-    punct    Connector_Punctuation | Dash_Punctuation | Close_Punctuation |
-             Final_Punctuation | Initial_Punctuation | Other_Punctuation |
-             Open_Punctuation
-    space    Space_Separator | Line_Separator | Paragraph_Separator |
-             0009 | 000A | 000B | 000C | 000D | 0085
-    upper    Uppercase_Letter
-    xdigit   0030 - 0039 | 0041 - 0046 | 0061 - 0066
-             (0-9, a-f, A-F)
+      alnum    Letter | Mark | Decimal_Number
+      alpha    Letter | Mark
+      ascii    0000 - 007F
+      blank    Space_Separator | 0009
+      cntrl    Control | Format | Unassigned | Private_Use | Surrogate
+      digit    Decimal_Number
+      graph    [[:^space:]] && ^Control && ^Unassigned && ^Surrogate
+      lower    Lowercase_Letter
+      print    [[:graph:]] | [[:space:]]
+      punct    Connector_Punctuation | Dash_Punctuation | Close_Punctuation |
+               Final_Punctuation | Initial_Punctuation | Other_Punctuation |
+               Open_Punctuation
+      space    Space_Separator | Line_Separator | Paragraph_Separator |
+               0009 | 000A | 000B | 000C | 000D | 0085
+      upper    Uppercase_Letter
+      xdigit   0030 - 0039 | 0041 - 0046 | 0061 - 0066
+               (0-9, a-f, A-F)
+      word     Letter | Mark | Decimal_Number | Connector_Punctuation
+
 
 
 7. ³ÈÄ¥¼°½¸¹ç
   (?>¼°)            ¸¶»ÒŪ¼°½¸¹ç
                     ¼°Á´ÂΤòÄ̲ᤷ¤¿¤È¤­¡¢¼°¤ÎÃæ¤Ç¤Î¸åÂàºÆ»î¹Ô¤ò¹Ô¤Ê¤ï¤Ê¤¤
 
-  (?<name>¼°)       Ì¾Á°ÉÕ¤­Êá³Í¼°½¸¹ç
+  (?<name>¼°), (?'name'¼°)
+                    Ì¾Á°ÉÕ¤­Êá³Í¼°½¸¹ç
                     ¼°½¸¹ç¤Ë̾Á°¤ò³ä¤êÅö¤Æ¤ë(ÄêµÁ¤¹¤ë)¡£
-                    (̾Á°¤Ïñ¸ì¹½À®Ê¸»ú¤Ç¤Ê¤±¤ì¤Ð¤Ê¤é¤Ê¤¤¡£ºÇ½é¤Îʸ»ú¤Ï
-                     ±ÑÂçʸ»ú¤Ç¤¢¤Ã¤Æ¤Ï¤¤¤±¤Ê¤¤¡£)
+                    (̾Á°¤Ïñ¸ì¹½À®Ê¸»ú¤Ç¤Ê¤±¤ì¤Ð¤Ê¤é¤Ê¤¤¡£)
 
                     Ì¾Á°¤À¤±¤Ç¤Ê¤¯¡¢Êá³Í¼°½¸¹ç¤ÈƱÍͤËÈÖ¹æ¤â³ä¤êÅö¤Æ¤é¤ì¤ë¡£
                     ÈÖ¹æ»ØÄ꤬¶Ø»ß¤µ¤ì¤Æ¤¤¤Ê¤¤¾õÂÖ (10. Êá³Í¼°½¸¹ç ¤ò»²¾È)
 
 8. ¸åÊý»²¾È
 
-  \n          ÈÖ¹æ»ØÄ껲¾È (n >= 1)
+  \n          ÈÖ¹æ»ØÄ껲¾È     (n >= 1)
+  \k<n>       ÈÖ¹æ»ØÄ껲¾È     (n >= 1)
+  \k'n'       ÈÖ¹æ»ØÄ껲¾È     (n >= 1)
+  \k<-n>      ÁêÂÐÈÖ¹æ»ØÄ껲¾È (n >= 1)
+  \k'-n'      ÁêÂÐÈÖ¹æ»ØÄ껲¾È (n >= 1)
   \k<name>    Ì¾Á°»ØÄ껲¾È
+  \k'name'    Ì¾Á°»ØÄ껲¾È
 
   Ì¾Á°»ØÄ껲¾È¤Ç¡¢¤½¤Î̾Á°¤¬Ê£¿ô¤Î¼°½¸¹ç¤Ç¿½ÅÄêµÁ¤µ¤ì¤Æ¤¤¤ë¾ì¹ç¤Ë¤Ï¡¢
   ÈÖ¹æ¤ÎÂ礭¤¤¼°½¸¹ç¤«¤éÍ¥ÀèŪ¤Ë»²¾È¤µ¤ì¤ë¡£
 
   ¥Í¥¹¥È¥ì¥Ù¥ëÉÕ¤­¸åÊý»²¾È
 
-    ¤³¤Îµ¡Ç½¤Ï¸½ºß¡¢Ruby 1.9¤Ç¤Ï̵¸ú¤Ë¤·¤Æ¤¤¤ë¡£
+    level: 0, 1, 2, ...
+
+    \k<n+level>     (n >= 1)
+    \k<n-level>     (n >= 1)
+    \k'n+level'     (n >= 1)
+    \k'n-level'     (n >= 1)
 
-    \k<name+n>     n: 0, 1, 2, ...
-    \k<name-n>     n: 0, 1, 2, ...
+    \k<name+level>
+    \k<name-level>
+    \k'name+level'
+    \k'name-level'
 
     ¸åÊý»²¾È¤Î°ÌÃÖ¤«¤éÁêÂÐŪ¤ÊÉôʬ¼°¸Æ½Ð¤·¥Í¥¹¥È¥ì¥Ù¥ë¤ò»ØÄꤷ¤Æ¡¢¤½¤Î¥ì¥Ù¥ë¤Ç¤Î
     Êá³ÍÃͤò»²¾È¤¹¤ë¡£
 9. Éôʬ¼°¸Æ½Ð¤· ("ÅÄÃæÅ¯¥¹¥Ú¥·¥ã¥ë")
 
   \g<name>    Ì¾Á°»ØÄê¸Æ½Ð¤·
-  \g<n>       ÈÖ¹æ»ØÄê¸Æ½Ð¤· (n >= 1)
+  \g'name'    Ì¾Á°»ØÄê¸Æ½Ð¤·
+  \g<n>       ÈÖ¹æ»ØÄê¸Æ½Ð¤·    (n >= 1)
+  \g'n'       ÈÖ¹æ»ØÄê¸Æ½Ð¤·    (n >= 1)
+  \g<-n>      ÁêÂÐÈÖ¹æ»ØÄê¸Æ½Ð¤· (n >= 1)
+  \g'-n'      ÁêÂÐÈÖ¹æ»ØÄê¸Æ½Ð¤· (n >= 1)
 
   ¢¨ ºÇº¸°ÌÃ֤ǤκƵ¢¸Æ½Ð¤·¤Ï¶Ø»ß¤µ¤ì¤ë¡£
      Îã. (?<name>a|\g<name>b)   => error
   ¤³¤ì¤é¤Î¿¶Éñ¤Î°ÕÌ£¤Ï¡¢
   Ì¾Á°ÉÕ¤­Êá³Í¤È̾Á°Ìµ¤·Êá³Í¤òƱ»þ¤Ë»ÈÍѤ¹¤ëɬÁ³À­¤Î¤¢¤ë¾ìÌ̤Ͼ¯¤Ê¤¤¤Ç¤¢¤í¤¦
   ¤È¤¤¤¦Íýͳ¤«¤é¹Í¤¨¤é¤ì¤¿¤â¤Î¤Ç¤¢¤ë¡£
-  ¤³¤ì¤é¤Î¥ª¥×¥·¥ç¥ó¤Ë¤Ä¤¤¤Æ¤Ï¡¢Ruby¤Ç¤Ï¸½ºß¼ÂÁõ¤µ¤ì¤Æ¤¤¤Ê¤¤¡£
 
 
 -----------------------------
 Êäµ­ 2. ÆÈ¼«³ÈÄ¥µ¡Ç½
 
    + 16¿Ê¿ô¿ô»ú¡¢Èó16¿Ê¿ô»ú  \h, \H
-   + Ì¾Á°ÉÕ¤­Êá³Í¼°½¸¹ç      (?<name>...)
+   + Ì¾Á°ÉÕ¤­Êá³Í¼°½¸¹ç      (?<name>...), (?'name'...)
    + Ì¾Á°»ØÄê¸åÊý»²¾È        \k<name>
    + Éôʬ¼°¸Æ½Ð¤·            \g<name>, \g<group-num>
 
 
 Êäµ­ 3. Perl 5.8.0¤ÈÈæ³Ó¤·¤ÆÂ¸ºß¤·¤Ê¤¤µ¡Ç½
 
-   + [:word:]
    + \N{name}
    + \l,\u,\L,\U, \X, \C
    + (?{code})
    * \Q...\E
      Ã¢¤·ONIG_SYNTAX_PERL¤ÈONIG_SYNTAX_JAVA¤Ç¤ÏÍ­¸ú
 
-   * \p{property}, \P{property}
-     Ã¢¤·ONIG_SYNTAX_PERL¤ÈONIG_SYNTAX_JAVA¤Ç¤ÏÍ­¸ú
-     Alnum, Alpha, Blank, Cntrl, Digit, Graph, Lower,
-     Print, Punct, Space, Upper, XDigit, ASCII¤¬»ØÄê¤Ç¤­¤ë¡£
-
-     ÆÃÀ­Ì¾¤ÎÁ°¤Ë 'Is'Á°ÃÖ»ì¤ò»ÈÍѤ¹¤ë¤³¤È¤Ï¡¢ONIG_SYNTAX_PERL¤Ç¤Î¤ß
-     µö¤µ¤ì¤Æ¤¤¤ë¡£
-     ex. \p{IsXDigit}.
 
-     ÆÃÀ­¤ÎÈÝÄê±é»»»Ò¤Ï¡¢ONIG_SYNTAX_PERL¤Ç¤Î¤ßµö¤µ¤ì¤Æ¤¤¤ë¡£
-     \p{^...}, \P{^...}
-
-
-Êäµ­ 4. Ruby¤ÎÆüËܸ첽 GNU regex(version 0.12)¤È¤Î°ã¤¤
+Êäµ­ 4. Ruby 1.8 ¤ÎÆüËܸ첽 GNU regex(version 0.12)¤È¤Î°ã¤¤
 
+   + Ê¸»úPropertyµ¡Ç½Äɲà(\p{property}, \P{Property})
    + 16¿Ê¿ô»ú¥¿¥¤¥×Äɲà(\h, \H)
    + Ìá¤êÆÉ¤ßµ¡Ç½¤òÄɲÃ
    + ¶¯Íߤʷ«¤êÊÖ¤·»ØÄê»Ò¤òÄɲà(?+, *+, ++)
 
 Êäµ­ 6. ÌäÂêÅÀ
 
-   + UTF-8¤Ç¡¢¥Ð¥¤¥ÈÃͤ¬Å¬Àµ¤Ê²Á¤«¤É¤¦¤«¤Î¥Á¥§¥Ã¥¯¤Ï¹Ô¤Ê¤Ã¤Æ¤¤¤Ê¤¤¡£
+   + ¥¨¥ó¥³¡¼¥Ç¥£¥ó¥°¥Ð¥¤¥ÈÃͤ¬Å¬Àµ¤Ê²Á¤«¤É¤¦¤«¤Î¥Á¥§¥Ã¥¯¤Ï¹Ô¤Ê¤Ã¤Æ¤¤¤Ê¤¤¡£
+
+     Îã: UTF-8
 
      * ÀèÆ¬¥Ð¥¤¥È¤È¤·¤ÆÉÔÀµ¤Ê¥Ð¥¤¥È¤ò°ìʸ»ú¤È¤ß¤Ê¤¹
        /./u =~ "\xa3"
 
      * ÉÔ´°Á´¤Ê¥Ð¥¤¥È¥·¡¼¥±¥ó¥¹¤Î¥Á¥§¥Ã¥¯¤ò¤·¤Ê¤¤
-      /\w+/ =~ "a\xf3\x8ec"
+      /\w+/u =~ "a\xf3\x8ec"
 
      ¤³¤ì¤òÄ´¤Ù¤ë¤³¤È¤Ï²Äǽ¤Ç¤Ï¤¢¤ë¤¬¡¢ÃÙ¤¯¤Ê¤ë¤Î¤Ç¹Ô¤Ê¤ï¤Ê¤¤¡£
 
+     Ê¸»úÎó¤È¤·¤Æ¡¢¤½¤Î¤è¤¦¤Ê¥Ð¥¤¥ÈÎó¤ò»ØÄꤷ¤¿¾ì¹ç¤Îưºî¤ÏÊݾڤ·¤Ê¤¤¡£
+
 ½ª¤ê
index 64be21d7fff9816a055804517779b789e05af34d..c2715f4e0d2584dd472dbd5d44527c5a85f358bb 100644 (file)
@@ -2,7 +2,7 @@
   ascii.c -  Oniguruma (regular expression library)
 **********************************************************************/
 /*-
- * Copyright (c) 2002-2004  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp>
+ * Copyright (c) 2002-2006  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -43,23 +43,14 @@ OnigEncodingType OnigEncodingASCII = {
   "US-ASCII",  /* name */
   1,           /* max byte length */
   1,           /* min byte length */
-  ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE,
-  {
-      (OnigCodePoint )'\\'                       /* esc */
-    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.'  */
-    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*'  */
-    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
-    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
-    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
-  },
   onigenc_is_mbc_newline_0x0a,
   onigenc_single_byte_mbc_to_code,
   onigenc_single_byte_code_to_mbclen,
   onigenc_single_byte_code_to_mbc,
-  onigenc_ascii_mbc_to_normalize,
-  onigenc_ascii_is_mbc_ambiguous,
-  onigenc_ascii_get_all_pair_ambig_codes,
-  onigenc_nothing_get_all_comp_ambig_codes,
+  onigenc_ascii_mbc_case_fold,
+  onigenc_ascii_apply_all_case_fold,
+  onigenc_ascii_get_case_fold_codes_by_str,
+  onigenc_minimum_property_name_to_ctype,
   ascii_is_code_ctype,
   onigenc_not_support_get_ctype_code_range,
   onigenc_single_byte_left_adjust_char_head,
index 86792666a4b0776c973f663941f23d6a32c93240..ca1e01b46308312469e515153abf280fa9cdab59 100644 (file)
@@ -2,7 +2,7 @@
   big5.c -  Oniguruma (regular expression library)
 **********************************************************************/
 /*-
- * Copyright (c) 2002-2005  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp>
+ * Copyright (c) 2002-2007  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -67,18 +67,21 @@ big5_code_to_mbc(OnigCodePoint code, UChar *buf)
 }
 
 static int
-big5_mbc_to_normalize(OnigAmbigType flag, const UChar** pp, const UChar* end,
-                      UChar* lower)
+big5_mbc_case_fold(OnigCaseFoldType flag, const UChar** pp, const UChar* end,
+                   UChar* lower)
 {
-  return onigenc_mbn_mbc_to_normalize(ONIG_ENCODING_BIG5, flag,
-                                      pp, end, lower);
+  return onigenc_mbn_mbc_case_fold(ONIG_ENCODING_BIG5, flag,
+                                   pp, end, lower);
 }
 
+#if 0
 static int
-big5_is_mbc_ambiguous(OnigAmbigType flag, const UChar** pp, const UChar* end)
+big5_is_mbc_ambiguous(OnigCaseFoldType flag,
+                     const UChar** pp, const UChar* end)
 {
   return onigenc_mbn_is_mbc_ambiguous(ONIG_ENCODING_BIG5, flag, pp, end);
 }
+#endif
 
 static int
 big5_is_code_ctype(OnigCodePoint code, unsigned int ctype)
@@ -125,14 +128,14 @@ big5_left_adjust_char_head(const UChar* start, const UChar* s)
       }
     } 
   }
-  len = enc_len(ONIG_ENCODING_BIG5, p);
+  len = enclen(ONIG_ENCODING_BIG5, p);
   if (p + len > s) return (UChar* )p;
   p += len;
   return (UChar* )(p + ((s - p) & ~1));
 }
 
 static int
-big5_is_allowed_reverse_match(const UChar* s, const UChar* end)
+big5_is_allowed_reverse_match(const UChar* s, const UChar* end ARG_UNUSED)
 {
   const UChar c = *s;
 
@@ -144,23 +147,14 @@ OnigEncodingType OnigEncodingBIG5 = {
   "Big5",     /* name */
   2,          /* max enc length */
   1,          /* min enc length */
-  ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE,
-  {
-      (OnigCodePoint )'\\'                       /* esc */
-    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.'  */
-    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*'  */
-    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
-    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
-    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
-  },
   onigenc_is_mbc_newline_0x0a,
   big5_mbc_to_code,
   onigenc_mb2_code_to_mbclen,
   big5_code_to_mbc,
-  big5_mbc_to_normalize,
-  big5_is_mbc_ambiguous,
-  onigenc_ascii_get_all_pair_ambig_codes,
-  onigenc_nothing_get_all_comp_ambig_codes,
+  big5_mbc_case_fold,
+  onigenc_ascii_apply_all_case_fold,
+  onigenc_ascii_get_case_fold_codes_by_str,
+  onigenc_minimum_property_name_to_ctype,
   big5_is_code_ctype,
   onigenc_not_support_get_ctype_code_range,
   big5_left_adjust_char_head,
diff --git a/ext/mbstring/oniguruma/enc/cp1251.c b/ext/mbstring/oniguruma/enc/cp1251.c
new file mode 100644 (file)
index 0000000..63e58d2
--- /dev/null
@@ -0,0 +1,200 @@
+/**********************************************************************
+  cp1251.c -  Oniguruma (regular expression library)
+**********************************************************************/
+/*-
+ * Copyright (c) 2006-2007  Byte      <byte AT mail DOT kna DOT ru>
+ *                          K.Kosako  <sndgk393 AT ybb DOT ne DOT jp>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include "regenc.h"
+
+#define ENC_CP1251_TO_LOWER_CASE(c) EncCP1251_ToLowerCaseTable[c]
+#define ENC_IS_CP1251_CTYPE(code,ctype) \
+  ((EncCP1251_CtypeTable[code] & CTYPE_TO_BIT(ctype)) != 0)
+
+static const UChar EncCP1251_ToLowerCaseTable[256] = {
+  '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
+  '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017',
+  '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027',
+  '\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037',
+  '\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047',
+  '\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057',
+  '\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067',
+  '\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077',
+  '\100', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
+  '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
+  '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
+  '\170', '\171', '\172', '\133', '\134', '\135', '\136', '\137',
+  '\140', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
+  '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
+  '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
+  '\170', '\171', '\172', '\173', '\174', '\175', '\176', '\177',
+  '\220', '\203', '\202', '\203', '\204', '\205', '\206', '\207',
+  '\210', '\211', '\232', '\213', '\234', '\235', '\236', '\237',
+  '\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227',
+  '\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237',
+  '\240', '\242', '\242', '\274', '\244', '\264', '\246', '\247',
+  '\270', '\251', '\272', '\253', '\254', '\255', '\256', '\277',
+  '\260', '\261', '\263', '\263', '\264', '\265', '\266', '\267',
+  '\270', '\271', '\272', '\273', '\274', '\276', '\276', '\277',
+  '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347',
+  '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',
+  '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367',
+  '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377',
+  '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347',
+  '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',
+  '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367',
+  '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377'
+};
+
+static const unsigned short EncCP1251_CtypeTable[256] = {
+  0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
+  0x4008, 0x428c, 0x4209, 0x4208, 0x4208, 0x4208, 0x4008, 0x4008,
+  0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
+  0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
+  0x4284, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
+  0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
+  0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0,
+  0x78b0, 0x78b0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
+  0x41a0, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x74a2,
+  0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2,
+  0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2,
+  0x74a2, 0x74a2, 0x74a2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x51a0,
+  0x41a0, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x70e2,
+  0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2,
+  0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2,
+  0x70e2, 0x70e2, 0x70e2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x4008,
+  0x34a2, 0x34a2, 0x01a0, 0x30e2, 0x01a0, 0x01a0, 0x01a0, 0x01a0,
+  0x0000, 0x01a0, 0x34a2, 0x01a0, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
+  0x30e2, 0x01a0, 0x01a0, 0x01a0, 0x01a0, 0x01a0, 0x01a0, 0x01a0,
+  0x0008, 0x0000, 0x30e2, 0x01a0, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
+  0x0280, 0x34a2, 0x30e2, 0x34a2, 0x01a0, 0x34a2, 0x01a0, 0x01a0,
+  0x34a2, 0x01a0, 0x34a2, 0x01a0, 0x01a0, 0x01a0, 0x01a0, 0x34a2,
+  0x01a0, 0x01a0, 0x34a2, 0x30e2, 0x30e2, 0x31e2, 0x01a0, 0x01a0,
+  0x30e2, 0x0000, 0x30e2, 0x01a0, 0x30e2, 0x34a2, 0x30e2, 0x30e2,
+  0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
+  0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
+  0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
+  0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
+  0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
+  0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
+  0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
+  0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2
+};
+
+static int
+cp1251_mbc_case_fold(OnigCaseFoldType flag ARG_UNUSED,
+             const UChar** pp, const UChar* end ARG_UNUSED, UChar* lower)
+{
+  const UChar* p = *pp;
+
+  *lower = ENC_CP1251_TO_LOWER_CASE(*p);
+  (*pp)++;
+  return 1;
+}
+
+static int
+cp1251_is_code_ctype(OnigCodePoint code, unsigned int ctype)
+{
+  if (code < 256)
+    return ENC_IS_CP1251_CTYPE(code, ctype);
+  else
+    return FALSE;
+}
+
+static const OnigPairCaseFoldCodes CaseFoldMap[] = {
+  { 0xb8, 0xa8 },
+
+  { 0xe0, 0xc0 },
+  { 0xe1, 0xc1 },
+  { 0xe2, 0xc2 },
+  { 0xe3, 0xc3 },
+  { 0xe4, 0xc4 },
+  { 0xe5, 0xc5 },
+  { 0xe6, 0xc6 },
+  { 0xe7, 0xc7 },
+  { 0xe8, 0xc8 },
+  { 0xe9, 0xc9 },
+  { 0xea, 0xca },
+  { 0xeb, 0xcb },
+  { 0xec, 0xcc },
+  { 0xed, 0xcd },
+  { 0xee, 0xce },
+  { 0xef, 0xcf },
+
+  { 0xf0, 0xd0 },
+  { 0xf1, 0xd1 },
+  { 0xf2, 0xd2 },
+  { 0xf3, 0xd3 },
+  { 0xf4, 0xd4 },
+  { 0xf5, 0xd5 },
+  { 0xf6, 0xd6 },
+  { 0xf7, 0xd7 },
+  { 0xf8, 0xd8 },
+  { 0xf9, 0xd9 },
+  { 0xfa, 0xda },
+  { 0xfb, 0xdb },
+  { 0xfc, 0xdc },
+  { 0xfd, 0xdd },
+  { 0xfe, 0xde },
+  { 0xff, 0xdf }
+};
+
+static int
+cp1251_apply_all_case_fold(OnigCaseFoldType flag,
+                              OnigApplyAllCaseFoldFunc f, void* arg)
+{
+  return onigenc_apply_all_case_fold_with_map(
+             sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 0,
+             flag, f, arg);
+}
+
+static int
+cp1251_get_case_fold_codes_by_str(OnigCaseFoldType flag,
+    const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[])
+{
+  return onigenc_get_case_fold_codes_by_str_with_map(
+            sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 0,
+            flag, p, end, items);
+}
+
+OnigEncodingType OnigEncodingCP1251 = {
+  onigenc_single_byte_mbc_enc_len,
+  "CP1251",      /* name */
+  1,             /* max enc length */
+  1,             /* min enc length */
+  onigenc_is_mbc_newline_0x0a,
+  onigenc_single_byte_mbc_to_code,
+  onigenc_single_byte_code_to_mbclen,
+  onigenc_single_byte_code_to_mbc,
+  cp1251_mbc_case_fold,
+  cp1251_apply_all_case_fold,
+  cp1251_get_case_fold_codes_by_str,
+  onigenc_minimum_property_name_to_ctype,
+  cp1251_is_code_ctype,
+  onigenc_not_support_get_ctype_code_range,
+  onigenc_single_byte_left_adjust_char_head,
+  onigenc_always_true_is_allowed_reverse_match
+};
index 71c81ee9fe833e241be47d01e8d5e749d742c05e..f605297cc38e3f9c63895ae28dd43ddd59b91ea2 100644 (file)
@@ -2,7 +2,7 @@
   euc_jp.c -  Oniguruma (regular expression library)
 **********************************************************************/
 /*-
- * Copyright (c) 2002-2005  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp>
+ * Copyright (c) 2002-2008  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -27,7 +27,7 @@
  * SUCH DAMAGE.
  */
 
-#include "regenc.h"
+#include "regint.h"
 
 #define eucjp_islead(c)    ((UChar )((c) - 0xa1) > 0xfe - 0xa1)
 
@@ -51,18 +51,18 @@ static const int EncLen_EUCJP[] = {
 };
 
 static int
-eucjp_mbc_enc_len(const UChar* p)
+mbc_enc_len(const UChar* p)
 {
   return EncLen_EUCJP[*p];
 }
 
 static OnigCodePoint
-eucjp_mbc_to_code(const UChar* p, const UChar* end)
+mbc_to_code(const UChar* p, const UChar* end)
 {
   int c, i, len;
   OnigCodePoint n;
 
-  len = enc_len(ONIG_ENCODING_EUC_JP, p);
+  len = enclen(ONIG_ENCODING_EUC_JP, p);
   n = (OnigCodePoint )*p++;
   if (len == 1) return n;
 
@@ -75,17 +75,18 @@ eucjp_mbc_to_code(const UChar* p, const UChar* end)
 }
 
 static int
-eucjp_code_to_mbclen(OnigCodePoint code)
+code_to_mbclen(OnigCodePoint code)
 {
   if (ONIGENC_IS_CODE_ASCII(code)) return 1;
   else if ((code & 0xff0000) != 0) return 3;
   else if ((code &   0xff00) != 0) return 2;
-  else return 0;
+  else
+    return ONIGERR_INVALID_CODE_POINT_VALUE;
 }
 
 #if 0
 static int
-eucjp_code_to_mbc_first(OnigCodePoint code)
+code_to_mbc_first(OnigCodePoint code)
 {
   int first;
 
@@ -103,7 +104,7 @@ eucjp_code_to_mbc_first(OnigCodePoint code)
 #endif
 
 static int
-eucjp_code_to_mbc(OnigCodePoint code, UChar *buf)
+code_to_mbc(OnigCodePoint code, UChar *buf)
 {
   UChar *p = buf;
 
@@ -112,66 +113,38 @@ eucjp_code_to_mbc(OnigCodePoint code, UChar *buf)
   *p++ = (UChar )(code & 0xff);
 
 #if 1
-  if (enc_len(ONIG_ENCODING_EUC_JP, buf) != (p - buf))
-    return ONIGENCERR_INVALID_WIDE_CHAR_VALUE;
+  if (enclen(ONIG_ENCODING_EUC_JP, buf) != (p - buf))
+    return ONIGERR_INVALID_CODE_POINT_VALUE;
 #endif  
   return p - buf;
 }
 
 static int
-eucjp_mbc_to_normalize(OnigAmbigType flag,
-                      const UChar** pp, const UChar* end, UChar* lower)
+mbc_case_fold(OnigCaseFoldType flag ARG_UNUSED,
+             const UChar** pp, const UChar* end ARG_UNUSED, UChar* lower)
 {
   int len;
   const UChar* p = *pp;
 
   if (ONIGENC_IS_MBC_ASCII(p)) {
-    if ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0) {
-      *lower = ONIGENC_ASCII_CODE_TO_LOWER_CASE(*p);
-    }
-    else {
-      *lower = *p;
-    }
-
+    *lower = ONIGENC_ASCII_CODE_TO_LOWER_CASE(*p);
     (*pp)++;
     return 1;
   }
   else {
-    len = enc_len(ONIG_ENCODING_EUC_JP, p);
-    if (lower != p) {
-      int i;
-      for (i = 0; i < len; i++) {
-       *lower++ = *p++;
-      }
+    int i;
+
+    len = enclen(ONIG_ENCODING_EUC_JP, p);
+    for (i = 0; i < len; i++) {
+      *lower++ = *p++;
     }
     (*pp) += len;
     return len; /* return byte length of converted char to lower */
   }
 }
 
-static int
-eucjp_is_mbc_ambiguous(OnigAmbigType flag, const UChar** pp, const UChar* end)
-{
-  return onigenc_mbn_is_mbc_ambiguous(ONIG_ENCODING_EUC_JP, flag, pp, end);
-}
-
-static int
-eucjp_is_code_ctype(OnigCodePoint code, unsigned int ctype)
-{
-  if (code < 128)
-    return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype);
-  else {
-    if ((ctype & (ONIGENC_CTYPE_WORD |
-                  ONIGENC_CTYPE_GRAPH | ONIGENC_CTYPE_PRINT)) != 0) {
-      return (eucjp_code_to_mbclen(code) > 1 ? TRUE : FALSE);
-    }
-  }
-
-  return FALSE;
-}
-
 static UChar*
-eucjp_left_adjust_char_head(const UChar* start, const UChar* s)
+left_adjust_char_head(const UChar* start, const UChar* s)
 {
   /* In this encoding
      mb-trail bytes doesn't mix with single bytes.
@@ -183,14 +156,14 @@ eucjp_left_adjust_char_head(const UChar* start, const UChar* s)
   p = s;
 
   while (!eucjp_islead(*p) && p > start) p--;
-  len = enc_len(ONIG_ENCODING_EUC_JP, p);
+  len = enclen(ONIG_ENCODING_EUC_JP, p);
   if (p + len > s) return (UChar* )p;
   p += len;
   return (UChar* )(p + ((s - p) & ~1));
 }
 
 static int
-eucjp_is_allowed_reverse_match(const UChar* s, const UChar* end)
+is_allowed_reverse_match(const UChar* s, const UChar* end ARG_UNUSED)
 {
   const UChar c = *s;
   if (c <= 0x7e || c == 0x8e || c == 0x8f)
@@ -199,30 +172,114 @@ eucjp_is_allowed_reverse_match(const UChar* s, const UChar* end)
     return FALSE;
 }
 
+
+static int PropertyInited = 0;
+static const OnigCodePoint** PropertyList;
+static int PropertyListNum;
+static int PropertyListSize;
+static hash_table_type* PropertyNameTable;
+
+static const OnigCodePoint CR_Hiragana[] = {
+  1,
+  0xa4a1, 0xa4f3
+}; /* CR_Hiragana */
+
+static const OnigCodePoint CR_Katakana[] = {
+  3,
+  0xa5a1, 0xa5f6,
+  0xaaa6, 0xaaaf,
+  0xaab1, 0xaadd
+}; /* CR_Katakana */
+
+static int
+init_property_list(void)
+{
+  int r;
+
+  PROPERTY_LIST_ADD_PROP("Hiragana", CR_Hiragana);
+  PROPERTY_LIST_ADD_PROP("Katakana", CR_Katakana);
+  PropertyInited = 1;
+
+ end:
+  return r;
+}
+
+static int
+property_name_to_ctype(OnigEncoding enc, UChar* p, UChar* end)
+{
+  hash_data_type ctype;
+
+  PROPERTY_LIST_INIT_CHECK;
+
+  if (onig_st_lookup_strend(PropertyNameTable, p, end, &ctype) == 0) {
+    return onigenc_minimum_property_name_to_ctype(enc, p, end);
+  }
+
+  return (int )ctype;
+}
+
+static int
+is_code_ctype(OnigCodePoint code, unsigned int ctype)
+{
+  if (ctype <= ONIGENC_MAX_STD_CTYPE) {
+    if (code < 128)
+      return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype);
+    else {
+      if (CTYPE_IS_WORD_GRAPH_PRINT(ctype)) {
+       return (code_to_mbclen(code) > 1 ? TRUE : FALSE);
+      }
+    }
+  }
+  else {
+    PROPERTY_LIST_INIT_CHECK;
+
+    ctype -= (ONIGENC_MAX_STD_CTYPE + 1);
+    if (ctype >= (unsigned int )PropertyListNum)
+      return ONIGERR_TYPE_BUG;
+
+    return onig_is_in_code_range((UChar* )PropertyList[ctype], code);
+  }
+
+  return FALSE;
+}
+
+static int
+get_ctype_code_range(OnigCtype ctype, OnigCodePoint* sb_out,
+                    const OnigCodePoint* ranges[])
+{
+  if (ctype <= ONIGENC_MAX_STD_CTYPE) {
+    return ONIG_NO_SUPPORT_CONFIG;
+  }
+  else {
+    *sb_out = 0x80;
+
+    PROPERTY_LIST_INIT_CHECK;
+
+    ctype -= (ONIGENC_MAX_STD_CTYPE + 1);
+    if (ctype >= (OnigCtype )PropertyListNum)
+      return ONIGERR_TYPE_BUG;
+
+    *ranges = PropertyList[ctype];
+    return 0;
+  }
+}
+
+
 OnigEncodingType OnigEncodingEUC_JP = {
-  eucjp_mbc_enc_len,
+  mbc_enc_len,
   "EUC-JP",   /* name */
   3,          /* max enc length */
   1,          /* min enc length */
-  ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE,
-  {
-      (OnigCodePoint )'\\'                       /* esc */
-    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.'  */
-    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*'  */
-    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
-    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
-    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
-  },
   onigenc_is_mbc_newline_0x0a,
-  eucjp_mbc_to_code,
-  eucjp_code_to_mbclen,
-  eucjp_code_to_mbc,
-  eucjp_mbc_to_normalize,
-  eucjp_is_mbc_ambiguous,
-  onigenc_ascii_get_all_pair_ambig_codes,
-  onigenc_nothing_get_all_comp_ambig_codes,
-  eucjp_is_code_ctype,
-  onigenc_not_support_get_ctype_code_range,
-  eucjp_left_adjust_char_head,
-  eucjp_is_allowed_reverse_match
+  mbc_to_code,
+  code_to_mbclen,
+  code_to_mbc,
+  mbc_case_fold,
+  onigenc_ascii_apply_all_case_fold,
+  onigenc_ascii_get_case_fold_codes_by_str,
+  property_name_to_ctype,
+  is_code_ctype,
+  get_ctype_code_range,
+  left_adjust_char_head,
+  is_allowed_reverse_match
 };
index 57bf801536cb1e8b814c7463954ed630a092d793..1beef09001c03a847710e41a430b530e84584c23 100644 (file)
@@ -2,7 +2,7 @@
   euc_kr.c -  Oniguruma (regular expression library)
 **********************************************************************/
 /*-
- * Copyright (c) 2002-2005  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp>
+ * Copyright (c) 2002-2007  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -67,18 +67,21 @@ euckr_code_to_mbc(OnigCodePoint code, UChar *buf)
 }
 
 static int
-euckr_mbc_to_normalize(OnigAmbigType flag, const UChar** pp, const UChar* end,
-                       UChar* lower)
+euckr_mbc_case_fold(OnigCaseFoldType flag, const UChar** pp, const UChar* end,
+                    UChar* lower)
 {
-  return onigenc_mbn_mbc_to_normalize(ONIG_ENCODING_EUC_KR, flag,
-                                      pp, end, lower);
+  return onigenc_mbn_mbc_case_fold(ONIG_ENCODING_EUC_KR, flag,
+                                   pp, end, lower);
 }
 
+#if 0
 static int
-euckr_is_mbc_ambiguous(OnigAmbigType flag, const UChar** pp, const UChar* end)
+euckr_is_mbc_ambiguous(OnigCaseFoldType flag,
+                      const UChar** pp, const UChar* end)
 {
   return onigenc_mbn_is_mbc_ambiguous(ONIG_ENCODING_EUC_KR, flag, pp, end);
 }
+#endif
 
 static int
 euckr_is_code_ctype(OnigCodePoint code, unsigned int ctype)
@@ -101,14 +104,14 @@ euckr_left_adjust_char_head(const UChar* start, const UChar* s)
   p = s;
 
   while (!euckr_islead(*p) && p > start) p--;
-  len = enc_len(ONIG_ENCODING_EUC_KR, p);
+  len = enclen(ONIG_ENCODING_EUC_KR, p);
   if (p + len > s) return (UChar* )p;
   p += len;
   return (UChar* )(p + ((s - p) & ~1));
 }
 
 static int
-euckr_is_allowed_reverse_match(const UChar* s, const UChar* end)
+euckr_is_allowed_reverse_match(const UChar* s, const UChar* end ARG_UNUSED)
 {
   const UChar c = *s;
   if (c <= 0x7e) return TRUE;
@@ -120,23 +123,14 @@ OnigEncodingType OnigEncodingEUC_KR = {
   "EUC-KR",   /* name */
   2,          /* max enc length */
   1,          /* min enc length */
-  ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE,
-  {
-      (OnigCodePoint )'\\'                       /* esc */
-    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.'  */
-    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*'  */
-    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
-    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
-    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
-  },
   onigenc_is_mbc_newline_0x0a,
   euckr_mbc_to_code,
   onigenc_mb2_code_to_mbclen,
   euckr_code_to_mbc,
-  euckr_mbc_to_normalize,
-  euckr_is_mbc_ambiguous,
-  onigenc_ascii_get_all_pair_ambig_codes,
-  onigenc_nothing_get_all_comp_ambig_codes,
+  euckr_mbc_case_fold,
+  onigenc_ascii_apply_all_case_fold,
+  onigenc_ascii_get_case_fold_codes_by_str,
+  onigenc_minimum_property_name_to_ctype,
   euckr_is_code_ctype,
   onigenc_not_support_get_ctype_code_range,
   euckr_left_adjust_char_head,
@@ -149,23 +143,14 @@ OnigEncodingType OnigEncodingEUC_CN = {
   "EUC-CN",   /* name */
   2,          /* max enc length */
   1,          /* min enc length */
-  ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE,
-  {
-      (OnigCodePoint )'\\'                       /* esc */
-    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.'  */
-    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*'  */
-    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
-    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
-    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
-  },
   onigenc_is_mbc_newline_0x0a,
   euckr_mbc_to_code,
   onigenc_mb2_code_to_mbclen,
   euckr_code_to_mbc,
-  euckr_mbc_to_normalize,
-  euckr_is_mbc_ambiguous,
-  onigenc_ascii_get_all_pair_ambig_codes,
-  onigenc_nothing_get_all_comp_ambig_codes,
+  euckr_mbc_case_fold,
+  onigenc_ascii_apply_all_case_fold,
+  onigenc_ascii_get_case_fold_codes_by_str,
+  onigenc_minimum_property_name_to_ctype,
   euckr_is_code_ctype,
   onigenc_not_support_get_ctype_code_range,
   euckr_left_adjust_char_head,
index 6f396e75e6a588676d5e7f70d126ce1c43242cc9..2ddeb9318af09efa79e6dcd83d409960f62641cc 100644 (file)
@@ -2,7 +2,7 @@
   euc_tw.c -  Oniguruma (regular expression library)
 **********************************************************************/
 /*-
- * Copyright (c) 2002-2005  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp>
+ * Copyright (c) 2002-2008  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -67,17 +67,11 @@ euctw_code_to_mbc(OnigCodePoint code, UChar *buf)
 }
 
 static int
-euctw_mbc_to_normalize(OnigAmbigType flag, const UChar** pp, const UChar* end,
-                       UChar* lower)
+euctw_mbc_case_fold(OnigCaseFoldType flag, const UChar** pp, const UChar* end,
+                    UChar* lower)
 {
-  return onigenc_mbn_mbc_to_normalize(ONIG_ENCODING_EUC_TW, flag,
-                                      pp, end, lower);
-}
-
-static int
-euctw_is_mbc_ambiguous(OnigAmbigType flag, const UChar** pp, const UChar* end)
-{
-  return onigenc_mbn_is_mbc_ambiguous(ONIG_ENCODING_EUC_TW, flag, pp, end);
+  return onigenc_mbn_mbc_case_fold(ONIG_ENCODING_EUC_TW, flag,
+                                   pp, end, lower);
 }
 
 static int
@@ -86,7 +80,7 @@ euctw_is_code_ctype(OnigCodePoint code, unsigned int ctype)
   return onigenc_mb4_is_code_ctype(ONIG_ENCODING_EUC_TW, code, ctype);
 }
 
-#define euctw_islead(c)    (((c) < 0xa1 && (c) != 0x8e) || (c) == 0xff)
+#define euctw_islead(c)    ((UChar )((c) - 0xa1) > 0xfe - 0xa1)
 
 static UChar*
 euctw_left_adjust_char_head(const UChar* start, const UChar* s)
@@ -101,14 +95,14 @@ euctw_left_adjust_char_head(const UChar* start, const UChar* s)
   p = s;
 
   while (!euctw_islead(*p) && p > start) p--;
-  len = enc_len(ONIG_ENCODING_EUC_TW, p);
+  len = enclen(ONIG_ENCODING_EUC_TW, p);
   if (p + len > s) return (UChar* )p;
   p += len;
   return (UChar* )(p + ((s - p) & ~1));
 }
 
 static int
-euctw_is_allowed_reverse_match(const UChar* s, const UChar* end)
+euctw_is_allowed_reverse_match(const UChar* s, const UChar* end ARG_UNUSED)
 {
   const UChar c = *s;
   if (c <= 0x7e) return TRUE;
@@ -120,23 +114,14 @@ OnigEncodingType OnigEncodingEUC_TW = {
   "EUC-TW",   /* name */
   4,          /* max enc length */
   1,          /* min enc length */
-  ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE,
-  {
-      (OnigCodePoint )'\\'                       /* esc */
-    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.'  */
-    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*'  */
-    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
-    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
-    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
-  },
   onigenc_is_mbc_newline_0x0a,
   euctw_mbc_to_code,
   onigenc_mb4_code_to_mbclen,
   euctw_code_to_mbc,
-  euctw_mbc_to_normalize,
-  euctw_is_mbc_ambiguous,
-  onigenc_ascii_get_all_pair_ambig_codes,
-  onigenc_nothing_get_all_comp_ambig_codes,
+  euctw_mbc_case_fold,
+  onigenc_ascii_apply_all_case_fold,
+  onigenc_ascii_get_case_fold_codes_by_str,
+  onigenc_minimum_property_name_to_ctype,
   euctw_is_code_ctype,
   onigenc_not_support_get_ctype_code_range,
   euctw_left_adjust_char_head,
index 01995ea094d5e508823941fa895fd76e9f2827a1..6bbd109eab81443a1ad0b39049435239995d7ab2 100644 (file)
@@ -2,8 +2,8 @@
   gb18030.c -  Oniguruma (regular expression library)
 **********************************************************************/
 /*-
- * Copyright (c) 2005  KUBO Takehiro <kubo AT jiubao DOT org>
- *                     K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
+ * Copyright (c) 2005-2007  KUBO Takehiro <kubo AT jiubao DOT org>
+ *                          K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -88,18 +88,21 @@ gb18030_code_to_mbc(OnigCodePoint code, UChar *buf)
 }
 
 static int
-gb18030_mbc_to_normalize(OnigAmbigType flag, const UChar** pp, const UChar* end,
-                       UChar* lower)
+gb18030_mbc_case_fold(OnigCaseFoldType flag, const UChar** pp, const UChar* end,
+                      UChar* lower)
 {
-  return onigenc_mbn_mbc_to_normalize(ONIG_ENCODING_GB18030, flag,
-                                      pp, end, lower);
+  return onigenc_mbn_mbc_case_fold(ONIG_ENCODING_GB18030, flag,
+                                   pp, end, lower);
 }
 
+#if 0
 static int
-gb18030_is_mbc_ambiguous(OnigAmbigType flag, const UChar** pp, const UChar* end)
+gb18030_is_mbc_ambiguous(OnigCaseFoldType flag,
+                        const UChar** pp, const UChar* end)
 {
   return onigenc_mbn_is_mbc_ambiguous(ONIG_ENCODING_GB18030, flag, pp, end);
 }
+#endif
 
 static int
 gb18030_is_code_ctype(OnigCodePoint code, unsigned int ctype)
@@ -467,7 +470,7 @@ gb18030_left_adjust_char_head(const UChar* start, const UChar* s)
 }
 
 static int
-gb18030_is_allowed_reverse_match(const UChar* s, const UChar* end)
+gb18030_is_allowed_reverse_match(const UChar* s, const UChar* end ARG_UNUSED)
 {
   return GB18030_MAP[*s] == C1 ? TRUE : FALSE;
 }
@@ -477,23 +480,14 @@ OnigEncodingType OnigEncodingGB18030 = {
   "GB18030",   /* name */
   4,          /* max enc length */
   1,          /* min enc length */
-  ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE,
-  {
-      (OnigCodePoint )'\\'                       /* esc */
-    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.'  */
-    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*'  */
-    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
-    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
-    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
-  },
   onigenc_is_mbc_newline_0x0a,
   gb18030_mbc_to_code,
   onigenc_mb4_code_to_mbclen,
   gb18030_code_to_mbc,
-  gb18030_mbc_to_normalize,
-  gb18030_is_mbc_ambiguous,
-  onigenc_ascii_get_all_pair_ambig_codes,
-  onigenc_nothing_get_all_comp_ambig_codes,
+  gb18030_mbc_case_fold,
+  onigenc_ascii_apply_all_case_fold,
+  onigenc_ascii_get_case_fold_codes_by_str,
+  onigenc_minimum_property_name_to_ctype,
   gb18030_is_code_ctype,
   onigenc_not_support_get_ctype_code_range,
   gb18030_left_adjust_char_head,
index 5646f26c102637e01bb3b7aae2e8e81d05d5778a..174b97f0267d45c4a48649e9019fa0e8dbc955c5 100644 (file)
@@ -2,7 +2,7 @@
   iso8859_1.c -  Oniguruma (regular expression library)
 **********************************************************************/
 /*-
- * Copyright (c) 2002-2006  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp>
+ * Copyright (c) 2002-2007  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
 #include "regenc.h"
 
 #define ENC_IS_ISO_8859_1_CTYPE(code,ctype) \
-  ((EncISO_8859_1_CtypeTable[code] & ctype) != 0)
+  ((EncISO_8859_1_CtypeTable[code] & CTYPE_TO_BIT(ctype)) != 0)
 
 static const unsigned short EncISO_8859_1_CtypeTable[256] = {
-  0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
-  0x2008, 0x220c, 0x2209, 0x2208, 0x2208, 0x2208, 0x2008, 0x2008,
-  0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
-  0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
-  0x2284, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
-  0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
-  0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0,
-  0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
-  0x21a0, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x34a2,
-  0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
-  0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
-  0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x31a0,
-  0x21a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2,
-  0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
-  0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
-  0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x2008,
+  0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
+  0x4008, 0x420c, 0x4209, 0x4208, 0x4208, 0x4208, 0x4008, 0x4008,
+  0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
+  0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
+  0x4284, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
+  0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
+  0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0,
+  0x78b0, 0x78b0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
+  0x41a0, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x74a2,
+  0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2,
+  0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2,
+  0x74a2, 0x74a2, 0x74a2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x51a0,
+  0x41a0, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x70e2,
+  0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2,
+  0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2,
+  0x70e2, 0x70e2, 0x70e2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x4008,
   0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
   0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
   0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
   0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
   0x0284, 0x01a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0,
-  0x00a0, 0x00a0, 0x10e2, 0x01a0, 0x00a0, 0x01a0, 0x00a0, 0x00a0,
-  0x00a0, 0x00a0, 0x10a0, 0x10a0, 0x00a0, 0x10e2, 0x00a0, 0x01a0,
-  0x00a0, 0x10a0, 0x10e2, 0x01a0, 0x10a0, 0x10a0, 0x10a0, 0x01a0,
-  0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2,
-  0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2,
-  0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x00a0,
-  0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x10e2,
-  0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2,
-  0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2,
-  0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x00a0,
-  0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2
+  0x00a0, 0x00a0, 0x30e2, 0x01a0, 0x00a0, 0x01a0, 0x00a0, 0x00a0,
+  0x00a0, 0x00a0, 0x10a0, 0x10a0, 0x00a0, 0x30e2, 0x00a0, 0x01a0,
+  0x00a0, 0x10a0, 0x30e2, 0x01a0, 0x10a0, 0x10a0, 0x10a0, 0x01a0,
+  0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
+  0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
+  0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x00a0,
+  0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x30e2,
+  0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
+  0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
+  0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x00a0,
+  0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2
+};
+
+static const OnigPairCaseFoldCodes CaseFoldMap[] = {
+  { 0xc0, 0xe0 },
+  { 0xc1, 0xe1 },
+  { 0xc2, 0xe2 },
+  { 0xc3, 0xe3 },
+  { 0xc4, 0xe4 },
+  { 0xc5, 0xe5 },
+  { 0xc6, 0xe6 },
+  { 0xc7, 0xe7 },
+  { 0xc8, 0xe8 },
+  { 0xc9, 0xe9 },
+  { 0xca, 0xea },
+  { 0xcb, 0xeb },
+  { 0xcc, 0xec },
+  { 0xcd, 0xed },
+  { 0xce, 0xee },
+  { 0xcf, 0xef },
+
+  { 0xd0, 0xf0 },
+  { 0xd1, 0xf1 },
+  { 0xd2, 0xf2 },
+  { 0xd3, 0xf3 },
+  { 0xd4, 0xf4 },
+  { 0xd5, 0xf5 },
+  { 0xd6, 0xf6 },
+  { 0xd8, 0xf8 },
+  { 0xd9, 0xf9 },
+  { 0xda, 0xfa },
+  { 0xdb, 0xfb },
+  { 0xdc, 0xfc },
+  { 0xdd, 0xfd },
+  { 0xde, 0xfe }
 };
 
 static int
-iso_8859_1_mbc_to_normalize(OnigAmbigType flag, const UChar** pp, const UChar* end, UChar* lower)
+apply_all_case_fold(OnigCaseFoldType flag,
+                   OnigApplyAllCaseFoldFunc f, void* arg)
 {
-  const UChar* p = *pp;
+  return onigenc_apply_all_case_fold_with_map(
+            sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 1,
+            flag, f, arg);
+}
 
-  if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
-       ONIGENC_IS_MBC_ASCII(p)) ||
-      ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 &&
-       !ONIGENC_IS_MBC_ASCII(p))) {
-    *lower = ONIGENC_ISO_8859_1_TO_LOWER_CASE(*p);
+static int
+get_case_fold_codes_by_str(OnigCaseFoldType flag ARG_UNUSED,
+                          const OnigUChar* p, const OnigUChar* end,
+                          OnigCaseFoldCodeItem items[])
+{
+  if (0x41 <= *p && *p <= 0x5a) {
+    items[0].byte_len = 1;
+    items[0].code_len = 1;
+    items[0].code[0] = (OnigCodePoint )(*p + 0x20);
+    if (*p == 0x53 && end > p + 1
+       && (*(p+1) == 0x53 || *(p+1) == 0x73)) { /* SS */
+      items[1].byte_len = 2;
+      items[1].code_len = 1;
+      items[1].code[0] = (OnigCodePoint )0xdf;
+      return 2;
+    }
+    else
+      return 1;
   }
-  else {
-    *lower = *p;
+  else if (0x61 <= *p && *p <= 0x7a) {
+    items[0].byte_len = 1;
+    items[0].code_len = 1;
+    items[0].code[0] = (OnigCodePoint )(*p - 0x20);
+    if (*p == 0x73 && end > p + 1
+       && (*(p+1) == 0x73 || *(p+1) == 0x53)) { /* ss */
+      items[1].byte_len = 2;
+      items[1].code_len = 1;
+      items[1].code[0] = (OnigCodePoint )0xdf;
+      return 2;
+    }
+    else
+      return 1;
   }
-  (*pp)++;
-  return 1; /* return byte length of converted char to lower */
+  else if (0xc0 <= *p && *p <= 0xcf) {
+    items[0].byte_len = 1;
+    items[0].code_len = 1;
+    items[0].code[0] = (OnigCodePoint )(*p + 0x20);
+    return 1;
+  }
+  else if (0xd0 <= *p && *p <= 0xdf) {
+    if (*p == 0xdf) {
+      items[0].byte_len = 1;
+      items[0].code_len = 2;
+      items[0].code[0] = (OnigCodePoint )'s';
+      items[0].code[1] = (OnigCodePoint )'s';
+
+      items[1].byte_len = 1;
+      items[1].code_len = 2;
+      items[1].code[0] = (OnigCodePoint )'S';
+      items[1].code[1] = (OnigCodePoint )'S';
+
+      items[2].byte_len = 1;
+      items[2].code_len = 2;
+      items[2].code[0] = (OnigCodePoint )'s';
+      items[2].code[1] = (OnigCodePoint )'S';
+
+      items[3].byte_len = 1;
+      items[3].code_len = 2;
+      items[3].code[0] = (OnigCodePoint )'S';
+      items[3].code[1] = (OnigCodePoint )'s';
+
+      return 4;
+    }
+    else if (*p != 0xd7) {
+      items[0].byte_len = 1;
+      items[0].code_len = 1;
+      items[0].code[0] = (OnigCodePoint )(*p + 0x20);
+      return 1;
+    }
+  }
+  else if (0xe0 <= *p && *p <= 0xef) {
+    items[0].byte_len = 1;
+    items[0].code_len = 1;
+    items[0].code[0] = (OnigCodePoint )(*p - 0x20);
+    return 1;
+  }
+  else if (0xf0 <= *p && *p <= 0xfe) {
+    if (*p != 0xf7) {
+      items[0].byte_len = 1;
+      items[0].code_len = 1;
+      items[0].code[0] = (OnigCodePoint )(*p - 0x20);
+      return 1;
+    }
+  }
+
+  return 0;
 }
 
 static int
-iso_8859_1_is_mbc_ambiguous(OnigAmbigType flag,
-                           const UChar** pp, const UChar* end)
+mbc_case_fold(OnigCaseFoldType flag, const UChar** pp,
+             const UChar* end ARG_UNUSED, UChar* lower)
 {
   const UChar* p = *pp;
 
+  if (*p == 0xdf && (flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) {
+    *lower++ = 's';
+    *lower   = 's';
+    (*pp)++;
+    return 2;
+  }
+
+  *lower = ONIGENC_ISO_8859_1_TO_LOWER_CASE(*p);
   (*pp)++;
-  if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
-       ONIGENC_IS_MBC_ASCII(p)) ||
-      ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 &&
-       !ONIGENC_IS_MBC_ASCII(p))) {
-    int v = (EncISO_8859_1_CtypeTable[*p] &
-             (ONIGENC_CTYPE_UPPER | ONIGENC_CTYPE_LOWER));
-
-    if ((v | ONIGENC_CTYPE_LOWER) != 0) {
-      /* 0xdf, 0xaa, 0xb5, 0xba are lower case letter, but can't convert. */
-      if (*p == 0xdf || (*p >= 0xaa && *p <= 0xba))
-        return FALSE;
-      else
-        return TRUE;
-    }
+  return 1;
+}
+
+#if 0
+static int
+is_mbc_ambiguous(OnigCaseFoldType flag,
+                const UChar** pp, const UChar* end)
+{
+  int v;
+  const UChar* p = *pp;
 
-    return (v != 0 ? TRUE : FALSE);
+  if (*p == 0xdf && (flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) {
+    (*pp)++;
+    return TRUE;
   }
-  return FALSE;
+
+  (*pp)++;
+  v = (EncISO_8859_1_CtypeTable[*p] & (BIT_CTYPE_UPPER | BIT_CTYPE_LOWER));
+  if ((v | BIT_CTYPE_LOWER) != 0) {
+    /* 0xdf, 0xaa, 0xb5, 0xba are lower case letter, but can't convert. */
+    if (*p >= 0xaa && *p <= 0xba)
+      return FALSE;
+    else
+      return TRUE;
+  }
+
+  return (v != 0 ? TRUE : FALSE);
 }
+#endif
 
 static int
-iso_8859_1_is_code_ctype(OnigCodePoint code, unsigned int ctype)
+is_code_ctype(OnigCodePoint code, unsigned int ctype)
 {
   if (code < 256)
     return ENC_IS_ISO_8859_1_CTYPE(code, ctype);
@@ -126,25 +257,15 @@ OnigEncodingType OnigEncodingISO_8859_1 = {
   "ISO-8859-1",  /* name */
   1,             /* max enc length */
   1,             /* min enc length */
-  (ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE |
-   ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE ),
-  {
-      (OnigCodePoint )'\\'                       /* esc */
-    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.'  */
-    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*'  */
-    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
-    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
-    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
-  },
   onigenc_is_mbc_newline_0x0a,
   onigenc_single_byte_mbc_to_code,
   onigenc_single_byte_code_to_mbclen,
   onigenc_single_byte_code_to_mbc,
-  iso_8859_1_mbc_to_normalize,
-  iso_8859_1_is_mbc_ambiguous,
-  onigenc_iso_8859_1_get_all_pair_ambig_codes,
-  onigenc_ess_tsett_get_all_comp_ambig_codes,
-  iso_8859_1_is_code_ctype,
+  mbc_case_fold,
+  apply_all_case_fold,
+  get_case_fold_codes_by_str,
+  onigenc_minimum_property_name_to_ctype,
+  is_code_ctype,
   onigenc_not_support_get_ctype_code_range,
   onigenc_single_byte_left_adjust_char_head,
   onigenc_always_true_is_allowed_reverse_match
index 8081ef8010f98c72f1954038c4aecc5f82272221..e35c19d78f8304b47736710f1367c31354176413 100644 (file)
@@ -2,7 +2,7 @@
   iso8859_10.c -  Oniguruma (regular expression library)
 **********************************************************************/
 /*-
- * Copyright (c) 2002-2006  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp>
+ * Copyright (c) 2002-2007  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -31,7 +31,7 @@
 
 #define ENC_ISO_8859_10_TO_LOWER_CASE(c) EncISO_8859_10_ToLowerCaseTable[c]
 #define ENC_IS_ISO_8859_10_CTYPE(code,ctype) \
-  ((EncISO_8859_10_CtypeTable[code] & ctype) != 0)
+  ((EncISO_8859_10_CtypeTable[code] & CTYPE_TO_BIT(ctype)) != 0)
 
 static const UChar EncISO_8859_10_ToLowerCaseTable[256] = {
   '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
@@ -69,88 +69,82 @@ static const UChar EncISO_8859_10_ToLowerCaseTable[256] = {
 };
 
 static const unsigned short EncISO_8859_10_CtypeTable[256] = {
-  0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
-  0x2008, 0x220c, 0x2209, 0x2208, 0x2208, 0x2208, 0x2008, 0x2008,
-  0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
-  0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
-  0x2284, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
-  0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
-  0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0,
-  0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
-  0x21a0, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x34a2,
-  0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
-  0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
-  0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x31a0,
-  0x21a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2,
-  0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
-  0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
-  0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x2008,
+  0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
+  0x4008, 0x420c, 0x4209, 0x4208, 0x4208, 0x4208, 0x4008, 0x4008,
+  0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
+  0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
+  0x4284, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
+  0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
+  0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0,
+  0x78b0, 0x78b0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
+  0x41a0, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x74a2,
+  0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2,
+  0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2,
+  0x74a2, 0x74a2, 0x74a2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x51a0,
+  0x41a0, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x70e2,
+  0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2,
+  0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2,
+  0x70e2, 0x70e2, 0x70e2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x4008,
   0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
   0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
   0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
   0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
-  0x0284, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x00a0,
-  0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x01a0, 0x14a2, 0x14a2,
-  0x00a0, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x01a0,
-  0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x01a0, 0x10e2, 0x10e2,
-  0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2,
-  0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2,
-  0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2,
-  0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x10e2,
-  0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2,
-  0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2,
-  0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2,
-  0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2
+  0x0284, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x00a0,
+  0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x01a0, 0x34a2, 0x34a2,
+  0x00a0, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x01a0,
+  0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x01a0, 0x30e2, 0x30e2,
+  0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
+  0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
+  0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
+  0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x30e2,
+  0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
+  0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
+  0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
+  0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2
 };
 
 static int
-iso_8859_10_mbc_to_normalize(OnigAmbigType flag,
-                            const UChar** pp, const UChar* end, UChar* lower)
+mbc_case_fold(OnigCaseFoldType flag,
+             const UChar** pp, const UChar* end ARG_UNUSED, UChar* lower)
 {
   const UChar* p = *pp;
 
-  if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
-       ONIGENC_IS_MBC_ASCII(p)) ||
-      ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 &&
-       !ONIGENC_IS_MBC_ASCII(p))) {
-    *lower = ENC_ISO_8859_10_TO_LOWER_CASE(*p);
-  }
-  else {
-    *lower = *p;
+  if (*p == 0xdf && (flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) {
+    *lower++ = 's';
+    *lower   = 's';
+    (*pp)++;
+    return 2;
   }
+
+  *lower = ENC_ISO_8859_10_TO_LOWER_CASE(*p);
   (*pp)++;
-  return 1; /* return byte length of converted char to lower */
+  return 1;
 }
 
+#if 0
 static int
-iso_8859_10_is_mbc_ambiguous(OnigAmbigType flag,
-                            const UChar** pp, const UChar* end)
+is_mbc_ambiguous(OnigCaseFoldType flag, const UChar** pp, const UChar* end)
 {
+  int v;
   const UChar* p = *pp;
 
-  (*pp)++;
-  if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
-       ONIGENC_IS_MBC_ASCII(p)) ||
-      ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 &&
-       !ONIGENC_IS_MBC_ASCII(p))) {
-    int v = (EncISO_8859_10_CtypeTable[*p] &
-             (ONIGENC_CTYPE_UPPER | ONIGENC_CTYPE_LOWER));
-
-    if ((v | ONIGENC_CTYPE_LOWER) != 0) {
-      /* 0xdf is lower case letter, but can't convert. */
-      if (*p == 0xdf)
-        return FALSE;
-      else
-        return TRUE;
-    }
+  if (*p == 0xdf && (flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) {
+    (*pp)++;
+    return TRUE;
+  }
 
-    return (v != 0 ? TRUE : FALSE);
+  (*pp)++;
+  v = (EncISO_8859_10_CtypeTable[*p] & (BIT_CTYPE_UPPER | BIT_CTYPE_LOWER));
+  if ((v | BIT_CTYPE_LOWER) != 0) {
+    return TRUE;
   }
-  return FALSE;
+
+  return (v != 0 ? TRUE : FALSE);
 }
+#endif
 
 static int
-iso_8859_10_is_code_ctype(OnigCodePoint code, unsigned int ctype)
+is_code_ctype(OnigCodePoint code, unsigned int ctype)
 {
   if (code < 256)
     return ENC_IS_ISO_8859_10_CTYPE(code, ctype);
@@ -158,116 +152,71 @@ iso_8859_10_is_code_ctype(OnigCodePoint code, unsigned int ctype)
     return FALSE;
 }
 
+static const OnigPairCaseFoldCodes CaseFoldMap[] = {
+ { 0xa1, 0xb1 },
+ { 0xa2, 0xb2 },
+ { 0xa3, 0xb3 },
+ { 0xa4, 0xb4 },
+ { 0xa5, 0xb5 },
+ { 0xa6, 0xb6 },
+ { 0xa8, 0xb8 },
+ { 0xa9, 0xb9 },
+ { 0xaa, 0xba },
+ { 0xab, 0xbb },
+ { 0xac, 0xbc },
+ { 0xae, 0xbe },
+ { 0xaf, 0xbf },
+
+ { 0xc0, 0xe0 },
+ { 0xc1, 0xe1 },
+ { 0xc2, 0xe2 },
+ { 0xc3, 0xe3 },
+ { 0xc4, 0xe4 },
+ { 0xc5, 0xe5 },
+ { 0xc6, 0xe6 },
+ { 0xc7, 0xe7 },
+ { 0xc8, 0xe8 },
+ { 0xc9, 0xe9 },
+ { 0xca, 0xea },
+ { 0xcb, 0xeb },
+ { 0xcc, 0xec },
+ { 0xcd, 0xed },
+ { 0xce, 0xee },
+ { 0xcf, 0xef },
+
+ { 0xd0, 0xf0 },
+ { 0xd1, 0xf1 },
+ { 0xd2, 0xf2 },
+ { 0xd3, 0xf3 },
+ { 0xd4, 0xf4 },
+ { 0xd5, 0xf5 },
+ { 0xd6, 0xf6 },
+ { 0xd7, 0xf7 },
+ { 0xd8, 0xf8 },
+ { 0xd9, 0xf9 },
+ { 0xda, 0xfa },
+ { 0xdb, 0xfb },
+ { 0xdc, 0xfc },
+ { 0xdd, 0xfd },
+ { 0xde, 0xfe }
+};
+
 static int
-iso_8859_10_get_all_pair_ambig_codes(OnigAmbigType flag,
-                                    const OnigPairAmbigCodes** ccs)
+apply_all_case_fold(OnigCaseFoldType flag,
+                   OnigApplyAllCaseFoldFunc f, void* arg)
 {
-  static const OnigPairAmbigCodes cc[] = {
-    { 0xa1, 0xb1 },
-    { 0xa2, 0xb2 },
-    { 0xa3, 0xb3 },
-    { 0xa4, 0xb4 },
-    { 0xa5, 0xb5 },
-    { 0xa6, 0xb6 },
-    { 0xa8, 0xb8 },
-    { 0xa9, 0xb9 },
-    { 0xaa, 0xba },
-    { 0xab, 0xbb },
-    { 0xac, 0xbc },
-    { 0xae, 0xbe },
-    { 0xaf, 0xbf },
-
-    { 0xb1, 0xa1 },
-    { 0xb2, 0xa2 },
-    { 0xb3, 0xa3 },
-    { 0xb4, 0xa4 },
-    { 0xb5, 0xa5 },
-    { 0xb6, 0xa6 },
-    { 0xb8, 0xa8 },
-    { 0xb9, 0xa9 },
-    { 0xba, 0xaa },
-    { 0xbb, 0xab },
-    { 0xbc, 0xac },
-    { 0xbe, 0xae },
-    { 0xbf, 0xaf },
-
-    { 0xc0, 0xe0 },
-    { 0xc1, 0xe1 },
-    { 0xc2, 0xe2 },
-    { 0xc3, 0xe3 },
-    { 0xc4, 0xe4 },
-    { 0xc5, 0xe5 },
-    { 0xc6, 0xe6 },
-    { 0xc7, 0xe7 },
-    { 0xc8, 0xe8 },
-    { 0xc9, 0xe9 },
-    { 0xca, 0xea },
-    { 0xcb, 0xeb },
-    { 0xcc, 0xec },
-    { 0xcd, 0xed },
-    { 0xce, 0xee },
-    { 0xcf, 0xef },
-
-    { 0xd0, 0xf0 },
-    { 0xd1, 0xf1 },
-    { 0xd2, 0xf2 },
-    { 0xd3, 0xf3 },
-    { 0xd4, 0xf4 },
-    { 0xd5, 0xf5 },
-    { 0xd6, 0xf6 },
-    { 0xd7, 0xf7 },
-    { 0xd8, 0xf8 },
-    { 0xd9, 0xf9 },
-    { 0xda, 0xfa },
-    { 0xdb, 0xfb },
-    { 0xdc, 0xfc },
-    { 0xdd, 0xfd },
-    { 0xde, 0xfe },
-
-    { 0xe0, 0xc0 },
-    { 0xe1, 0xc1 },
-    { 0xe2, 0xc2 },
-    { 0xe3, 0xc3 },
-    { 0xe4, 0xc4 },
-    { 0xe5, 0xc5 },
-    { 0xe6, 0xc6 },
-    { 0xe7, 0xc7 },
-    { 0xe8, 0xc8 },
-    { 0xe9, 0xc9 },
-    { 0xea, 0xca },
-    { 0xeb, 0xcb },
-    { 0xec, 0xcc },
-    { 0xed, 0xcd },
-    { 0xee, 0xce },
-    { 0xef, 0xcf },
-
-    { 0xf0, 0xd0 },
-    { 0xf1, 0xd1 },
-    { 0xf2, 0xd2 },
-    { 0xf3, 0xd3 },
-    { 0xf4, 0xd4 },
-    { 0xf5, 0xd5 },
-    { 0xf6, 0xd6 },
-    { 0xf7, 0xd7 },
-    { 0xf8, 0xd8 },
-    { 0xf9, 0xd9 },
-    { 0xfa, 0xda },
-    { 0xfb, 0xdb },
-    { 0xfc, 0xdc },
-    { 0xfd, 0xdd },
-    { 0xfe, 0xde }
-  };
+  return onigenc_apply_all_case_fold_with_map(
+             sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 1,
+             flag, f, arg);
+}
 
-  if (flag == ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) {
-    *ccs = OnigAsciiPairAmbigCodes;
-    return 52;
-  }
-  if (flag == ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) {
-    *ccs = cc;
-    return sizeof(cc) / sizeof(OnigPairAmbigCodes);
-  }
-  else
-    return 0;
+static int
+get_case_fold_codes_by_str(OnigCaseFoldType flag,
+    const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[])
+{
+  return onigenc_get_case_fold_codes_by_str_with_map(
+            sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 1,
+            flag, p, end, items);
 }
 
 OnigEncodingType OnigEncodingISO_8859_10 = {
@@ -275,25 +224,15 @@ OnigEncodingType OnigEncodingISO_8859_10 = {
   "ISO-8859-10", /* name */
   1,             /* max enc length */
   1,             /* min enc length */
-  (ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE |
-   ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE ),
-  {
-      (OnigCodePoint )'\\'                       /* esc */
-    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.'  */
-    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*'  */
-    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
-    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
-    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
-  },
   onigenc_is_mbc_newline_0x0a,
   onigenc_single_byte_mbc_to_code,
   onigenc_single_byte_code_to_mbclen,
   onigenc_single_byte_code_to_mbc,
-  iso_8859_10_mbc_to_normalize,
-  iso_8859_10_is_mbc_ambiguous,
-  iso_8859_10_get_all_pair_ambig_codes,
-  onigenc_ess_tsett_get_all_comp_ambig_codes,
-  iso_8859_10_is_code_ctype,
+  mbc_case_fold,
+  apply_all_case_fold,
+  get_case_fold_codes_by_str,
+  onigenc_minimum_property_name_to_ctype,
+  is_code_ctype,
   onigenc_not_support_get_ctype_code_range,
   onigenc_single_byte_left_adjust_char_head,
   onigenc_always_true_is_allowed_reverse_match
index de9bb3b8257168595f2d9cb193ef3ce0e60a9a40..8a460a30475fc3e9f5fbe24d132f7f27d1a13512 100644 (file)
@@ -2,7 +2,7 @@
   iso8859_11.c -  Oniguruma (regular expression library)
 **********************************************************************/
 /*-
- * Copyright (c) 2002-2004  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp>
+ * Copyright (c) 2002-2007  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
 #include "regenc.h"
 
 #define ENC_IS_ISO_8859_11_CTYPE(code,ctype) \
-  ((EncISO_8859_11_CtypeTable[code] & ctype) != 0)
+  ((EncISO_8859_11_CtypeTable[code] & CTYPE_TO_BIT(ctype)) != 0)
 
 static const unsigned short EncISO_8859_11_CtypeTable[256] = {
-  0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
-  0x2008, 0x220c, 0x2209, 0x2208, 0x2208, 0x2208, 0x2008, 0x2008,
-  0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
-  0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
-  0x2284, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
-  0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
-  0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0,
-  0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
-  0x21a0, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x34a2,
-  0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
-  0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
-  0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x31a0,
-  0x21a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2,
-  0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
-  0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
-  0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x2008,
+  0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
+  0x4008, 0x420c, 0x4209, 0x4208, 0x4208, 0x4208, 0x4008, 0x4008,
+  0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
+  0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
+  0x4284, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
+  0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
+  0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0,
+  0x78b0, 0x78b0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
+  0x41a0, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x74a2,
+  0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2,
+  0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2,
+  0x74a2, 0x74a2, 0x74a2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x51a0,
+  0x41a0, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x70e2,
+  0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2,
+  0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2,
+  0x70e2, 0x70e2, 0x70e2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x4008,
   0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
   0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
   0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
   0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
-  0x0284, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2,
-  0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2,
-  0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2,
-  0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2,
-  0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2,
-  0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2,
-  0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2,
-  0x10a2, 0x10a2, 0x10a2, 0x0000, 0x0000, 0x0000, 0x0000, 0x10a2,
-  0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2,
-  0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2,
-  0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2,
-  0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x0000, 0x0000, 0x0000, 0x0000
+  0x0284, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2,
+  0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2,
+  0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2,
+  0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2,
+  0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2,
+  0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2,
+  0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2,
+  0x30a2, 0x30a2, 0x30a2, 0x0000, 0x0000, 0x0000, 0x0000, 0x30a2,
+  0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2,
+  0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2,
+  0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2,
+  0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x0000, 0x0000, 0x0000, 0x0000
 };
 
 static int
-iso_8859_11_is_code_ctype(OnigCodePoint code, unsigned int ctype)
+is_code_ctype(OnigCodePoint code, unsigned int ctype)
 {
   if (code < 256)
     return ENC_IS_ISO_8859_11_CTYPE(code, ctype);
@@ -81,24 +81,15 @@ OnigEncodingType OnigEncodingISO_8859_11 = {
   "ISO-8859-11",  /* name */
   1,             /* max enc length */
   1,             /* min enc length */
-  ( ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE ),
-  {
-      (OnigCodePoint )'\\'                       /* esc */
-    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.'  */
-    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*'  */
-    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
-    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
-    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
-  },
   onigenc_is_mbc_newline_0x0a,
   onigenc_single_byte_mbc_to_code,
   onigenc_single_byte_code_to_mbclen,
   onigenc_single_byte_code_to_mbc,
-  onigenc_ascii_mbc_to_normalize,
-  onigenc_ascii_is_mbc_ambiguous,
-  onigenc_ascii_get_all_pair_ambig_codes,
-  onigenc_nothing_get_all_comp_ambig_codes,
-  iso_8859_11_is_code_ctype,
+  onigenc_ascii_mbc_case_fold,
+  onigenc_ascii_apply_all_case_fold,
+  onigenc_ascii_get_case_fold_codes_by_str,
+  onigenc_minimum_property_name_to_ctype,
+  is_code_ctype,
   onigenc_not_support_get_ctype_code_range,
   onigenc_single_byte_left_adjust_char_head,
   onigenc_always_true_is_allowed_reverse_match
index 69316edfc351b45b1684e318e99a10fd6590b863..3670d92ea55600923a3603b7c148f77ae577f1ae 100644 (file)
@@ -31,7 +31,7 @@
 
 #define ENC_ISO_8859_13_TO_LOWER_CASE(c) EncISO_8859_13_ToLowerCaseTable[c]
 #define ENC_IS_ISO_8859_13_CTYPE(code,ctype) \
-  ((EncISO_8859_13_CtypeTable[code] & ctype) != 0)
+  ((EncISO_8859_13_CtypeTable[code] & CTYPE_TO_BIT(ctype)) != 0)
 
 static const UChar EncISO_8859_13_ToLowerCaseTable[256] = {
   '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
@@ -69,84 +69,83 @@ static const UChar EncISO_8859_13_ToLowerCaseTable[256] = {
 };
 
 static const unsigned short EncISO_8859_13_CtypeTable[256] = {
-  0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
-  0x2008, 0x220c, 0x2209, 0x2208, 0x2208, 0x2208, 0x2008, 0x2008,
-  0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
-  0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
-  0x2284, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
-  0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
-  0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0,
-  0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
-  0x21a0, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x34a2,
-  0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
-  0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
-  0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x31a0,
-  0x21a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2,
-  0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
-  0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
-  0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x2008,
+  0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
+  0x4008, 0x420c, 0x4209, 0x4208, 0x4208, 0x4208, 0x4008, 0x4008,
+  0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
+  0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
+  0x4284, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
+  0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
+  0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0,
+  0x78b0, 0x78b0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
+  0x41a0, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x74a2,
+  0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2,
+  0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2,
+  0x74a2, 0x74a2, 0x74a2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x51a0,
+  0x41a0, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x70e2,
+  0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2,
+  0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2,
+  0x70e2, 0x70e2, 0x70e2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x4008,
   0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
   0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
   0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
   0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
   0x0284, 0x01a0, 0x00a0, 0x00a0, 0x00a0, 0x01a0, 0x00a0, 0x00a0,
-  0x14a2, 0x00a0, 0x14a2, 0x01a0, 0x00a0, 0x01a0, 0x00a0, 0x14a2,
-  0x00a0, 0x00a0, 0x10a0, 0x10a0, 0x01a0, 0x10e2, 0x00a0, 0x01a0,
-  0x10e2, 0x10a0, 0x10e2, 0x01a0, 0x10a0, 0x10a0, 0x10a0, 0x10e2,
-  0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2,
-  0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2,
-  0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x00a0,
-  0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x10e2,
-  0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2,
-  0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2,
-  0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x00a0,
-  0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x01a0
+  0x34a2, 0x00a0, 0x34a2, 0x01a0, 0x00a0, 0x01a0, 0x00a0, 0x34a2,
+  0x00a0, 0x00a0, 0x10a0, 0x10a0, 0x01a0, 0x30e2, 0x00a0, 0x01a0,
+  0x30e2, 0x10a0, 0x30e2, 0x01a0, 0x10a0, 0x10a0, 0x10a0, 0x30e2,
+  0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
+  0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
+  0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x00a0,
+  0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x30e2,
+  0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
+  0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
+  0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x00a0,
+  0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x01a0
 };
 
 static int
-mbc_to_normalize(OnigAmbigType flag,
-                const UChar** pp, const UChar* end, UChar* lower)
+mbc_case_fold(OnigCaseFoldType flag,
+             const UChar** pp, const UChar* end ARG_UNUSED, UChar* lower)
 {
   const UChar* p = *pp;
 
-  if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
-       ONIGENC_IS_MBC_ASCII(p)) ||
-      ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 &&
-       !ONIGENC_IS_MBC_ASCII(p))) {
-    *lower = ENC_ISO_8859_13_TO_LOWER_CASE(*p);
-  }
-  else {
-    *lower = *p;
+  if (*p == 0xdf && (flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) {
+    *lower++ = 's';
+    *lower   = 's';
+    (*pp)++;
+    return 2;
   }
+
+  *lower = ENC_ISO_8859_13_TO_LOWER_CASE(*p);
   (*pp)++;
-  return 1; /* return byte length of converted char to lower */
+  return 1;
 }
 
+#if 0
 static int
-is_mbc_ambiguous(OnigAmbigType flag, const UChar** pp, const UChar* end)
+is_mbc_ambiguous(OnigCaseFoldType flag, const UChar** pp, const UChar* end)
 {
+  int v;
   const UChar* p = *pp;
 
-  (*pp)++;
-  if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
-       ONIGENC_IS_MBC_ASCII(p)) ||
-      ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 &&
-       !ONIGENC_IS_MBC_ASCII(p))) {
-    int v = (EncISO_8859_13_CtypeTable[*p] &
-             (ONIGENC_CTYPE_UPPER | ONIGENC_CTYPE_LOWER));
-
-    if ((v | ONIGENC_CTYPE_LOWER) != 0) {
-      /* 0xdf, 0xb5 are lower case letter, but can't convert. */
-      if (*p == 0xdf || *p == 0xb5)
-        return FALSE;
-      else
-        return TRUE;
-    }
+  if (*p == 0xdf && (flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) {
+    (*pp)++;
+    return TRUE;
+  }
 
-    return (v != 0 ? TRUE : FALSE);
+  (*pp)++;
+  v = (EncISO_8859_13_CtypeTable[*p] & (BIT_CTYPE_UPPER | BIT_CTYPE_LOWER));
+  if ((v | BIT_CTYPE_LOWER) != 0) {
+    /* 0xdf, 0xb5 are lower case letter, but can't convert. */
+    if (*p == 0xb5)
+      return FALSE;
+    else
+      return TRUE;
   }
-  return FALSE;
+
+  return (v != 0 ? TRUE : FALSE);
 }
+#endif
 
 static int
 is_code_ctype(OnigCodePoint code, unsigned int ctype)
@@ -157,85 +156,56 @@ is_code_ctype(OnigCodePoint code, unsigned int ctype)
     return FALSE;
 }
 
+static const OnigPairCaseFoldCodes CaseFoldMap[] = {
+ { 0xc0, 0xe0 },
+ { 0xc1, 0xe1 },
+ { 0xc2, 0xe2 },
+ { 0xc3, 0xe3 },
+ { 0xc4, 0xe4 },
+ { 0xc5, 0xe5 },
+ { 0xc6, 0xe6 },
+ { 0xc7, 0xe7 },
+ { 0xc8, 0xe8 },
+ { 0xc9, 0xe9 },
+ { 0xca, 0xea },
+ { 0xcb, 0xeb },
+ { 0xcc, 0xec },
+ { 0xcd, 0xed },
+ { 0xce, 0xee },
+ { 0xcf, 0xef },
+
+ { 0xd0, 0xf0 },
+ { 0xd1, 0xf1 },
+ { 0xd2, 0xf2 },
+ { 0xd3, 0xf3 },
+ { 0xd4, 0xf4 },
+ { 0xd5, 0xf5 },
+ { 0xd6, 0xf6 },
+ { 0xd8, 0xf8 },
+ { 0xd9, 0xf9 },
+ { 0xda, 0xfa },
+ { 0xdb, 0xfb },
+ { 0xdc, 0xfc },
+ { 0xdd, 0xfd },
+ { 0xde, 0xfe }
+};
+
 static int
-get_all_pair_ambig_codes(OnigAmbigType flag, const OnigPairAmbigCodes** ccs)
+apply_all_case_fold(OnigCaseFoldType flag,
+                   OnigApplyAllCaseFoldFunc f, void* arg)
 {
- static const OnigPairAmbigCodes cc[] = {
-  { 0xc0, 0xe0 },
-  { 0xc1, 0xe1 },
-  { 0xc2, 0xe2 },
-  { 0xc3, 0xe3 },
-  { 0xc4, 0xe4 },
-  { 0xc5, 0xe5 },
-  { 0xc6, 0xe6 },
-  { 0xc7, 0xe7 },
-  { 0xc8, 0xe8 },
-  { 0xc9, 0xe9 },
-  { 0xca, 0xea },
-  { 0xcb, 0xeb },
-  { 0xcc, 0xec },
-  { 0xcd, 0xed },
-  { 0xce, 0xee },
-  { 0xcf, 0xef },
-
-  { 0xd0, 0xf0 },
-  { 0xd1, 0xf1 },
-  { 0xd2, 0xf2 },
-  { 0xd3, 0xf3 },
-  { 0xd4, 0xf4 },
-  { 0xd5, 0xf5 },
-  { 0xd6, 0xf6 },
-  { 0xd8, 0xf8 },
-  { 0xd9, 0xf9 },
-  { 0xda, 0xfa },
-  { 0xdb, 0xfb },
-  { 0xdc, 0xfc },
-  { 0xdd, 0xfd },
-  { 0xde, 0xfe },
-
-  { 0xe0, 0xc0 },
-  { 0xe1, 0xc1 },
-  { 0xe2, 0xc2 },
-  { 0xe3, 0xc3 },
-  { 0xe4, 0xc4 },
-  { 0xe5, 0xc5 },
-  { 0xe6, 0xc6 },
-  { 0xe7, 0xc7 },
-  { 0xe8, 0xc8 },
-  { 0xe9, 0xc9 },
-  { 0xea, 0xca },
-  { 0xeb, 0xcb },
-  { 0xec, 0xcc },
-  { 0xed, 0xcd },
-  { 0xee, 0xce },
-  { 0xef, 0xcf },
-
-  { 0xf0, 0xd0 },
-  { 0xf1, 0xd1 },
-  { 0xf2, 0xd2 },
-  { 0xf3, 0xd3 },
-  { 0xf4, 0xd4 },
-  { 0xf5, 0xd5 },
-  { 0xf6, 0xd6 },
-  { 0xf8, 0xd8 },
-  { 0xf9, 0xd9 },
-  { 0xfa, 0xda },
-  { 0xfb, 0xdb },
-  { 0xfc, 0xdc },
-  { 0xfd, 0xdd },
-  { 0xfe, 0xde }
- };
+  return onigenc_apply_all_case_fold_with_map(
+             sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 1,
+             flag, f, arg);
+}
 
-  if (flag == ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) {
-    *ccs = OnigAsciiPairAmbigCodes;
-    return 52;
-  }
-  if (flag == ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) {
-    *ccs = cc;
-    return sizeof(cc) / sizeof(OnigPairAmbigCodes);
-  }
-  else
-    return 0;
+static int
+get_case_fold_codes_by_str(OnigCaseFoldType flag,
+   const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[])
+{
+  return onigenc_get_case_fold_codes_by_str_with_map(
+            sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 1,
+            flag, p, end, items);
 }
 
 OnigEncodingType OnigEncodingISO_8859_13 = {
@@ -243,24 +213,14 @@ OnigEncodingType OnigEncodingISO_8859_13 = {
   "ISO-8859-13",  /* name */
   1,             /* max enc length */
   1,             /* min enc length */
-  (ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE |
-   ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE ),
-  {
-      (OnigCodePoint )'\\'                       /* esc */
-    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.'  */
-    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*'  */
-    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
-    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
-    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
-  },
   onigenc_is_mbc_newline_0x0a,
   onigenc_single_byte_mbc_to_code,
   onigenc_single_byte_code_to_mbclen,
   onigenc_single_byte_code_to_mbc,
-  mbc_to_normalize,
-  is_mbc_ambiguous,
-  get_all_pair_ambig_codes,
-  onigenc_ess_tsett_get_all_comp_ambig_codes,
+  mbc_case_fold,
+  apply_all_case_fold,
+  get_case_fold_codes_by_str,
+  onigenc_minimum_property_name_to_ctype,
   is_code_ctype,
   onigenc_not_support_get_ctype_code_range,
   onigenc_single_byte_left_adjust_char_head,
index 44638cf13a50f5e8711e5786ccf13e14b739aad5..3596d4479aa7fd70ebc79398d436fae2151b3c76 100644 (file)
@@ -31,7 +31,7 @@
 
 #define ENC_ISO_8859_14_TO_LOWER_CASE(c) EncISO_8859_14_ToLowerCaseTable[c]
 #define ENC_IS_ISO_8859_14_CTYPE(code,ctype) \
-  ((EncISO_8859_14_CtypeTable[code] & ctype) != 0)
+  ((EncISO_8859_14_CtypeTable[code] & CTYPE_TO_BIT(ctype)) != 0)
 
 static const UChar EncISO_8859_14_ToLowerCaseTable[256] = {
   '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
@@ -69,84 +69,80 @@ static const UChar EncISO_8859_14_ToLowerCaseTable[256] = {
 };
 
 static const unsigned short EncISO_8859_14_CtypeTable[256] = {
-  0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
-  0x2008, 0x220c, 0x2209, 0x2208, 0x2208, 0x2208, 0x2008, 0x2008,
-  0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
-  0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
-  0x2284, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
-  0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
-  0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0,
-  0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
-  0x21a0, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x34a2,
-  0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
-  0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
-  0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x31a0,
-  0x21a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2,
-  0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
-  0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
-  0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x2008,
+  0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
+  0x4008, 0x420c, 0x4209, 0x4208, 0x4208, 0x4208, 0x4008, 0x4008,
+  0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
+  0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
+  0x4284, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
+  0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
+  0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0,
+  0x78b0, 0x78b0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
+  0x41a0, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x74a2,
+  0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2,
+  0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2,
+  0x74a2, 0x74a2, 0x74a2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x51a0,
+  0x41a0, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x70e2,
+  0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2,
+  0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2,
+  0x70e2, 0x70e2, 0x70e2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x4008,
   0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
   0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
   0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
   0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
-  0x0284, 0x14a2, 0x10e2, 0x00a0, 0x14a2, 0x10e2, 0x14a2, 0x00a0,
-  0x14a2, 0x00a0, 0x14a2, 0x10e2, 0x14a2, 0x01a0, 0x00a0, 0x14a2,
-  0x14a2, 0x10e2, 0x14a2, 0x10e2, 0x14a2, 0x10e2, 0x00a0, 0x14a2,
-  0x10e2, 0x10e2, 0x10e2, 0x14a2, 0x10e2, 0x14a2, 0x10e2, 0x10e2,
-  0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2,
-  0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2,
-  0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2,
-  0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x10e2,
-  0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2,
-  0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2,
-  0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2,
-  0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2
+  0x0284, 0x34a2, 0x30e2, 0x00a0, 0x34a2, 0x30e2, 0x34a2, 0x00a0,
+  0x34a2, 0x00a0, 0x34a2, 0x30e2, 0x34a2, 0x01a0, 0x00a0, 0x34a2,
+  0x34a2, 0x30e2, 0x34a2, 0x30e2, 0x34a2, 0x30e2, 0x00a0, 0x34a2,
+  0x30e2, 0x30e2, 0x30e2, 0x34a2, 0x30e2, 0x34a2, 0x30e2, 0x30e2,
+  0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
+  0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
+  0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
+  0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x30e2,
+  0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
+  0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
+  0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
+  0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2
 };
 
 static int
-mbc_to_normalize(OnigAmbigType flag,
-                const UChar** pp, const UChar* end, UChar* lower)
+mbc_case_fold(OnigCaseFoldType flag,
+             const UChar** pp, const UChar* end ARG_UNUSED, UChar* lower)
 {
   const UChar* p = *pp;
 
-  if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
-       ONIGENC_IS_MBC_ASCII(p)) ||
-      ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 &&
-       !ONIGENC_IS_MBC_ASCII(p))) {
-    *lower = ENC_ISO_8859_14_TO_LOWER_CASE(*p);
-  }
-  else {
-    *lower = *p;
+  if (*p == 0xdf && (flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) {
+    *lower++ = 's';
+    *lower   = 's';
+    (*pp)++;
+    return 2;
   }
+
+  *lower = ENC_ISO_8859_14_TO_LOWER_CASE(*p);
   (*pp)++;
   return 1; /* return byte length of converted char to lower */
 }
 
+#if 0
 static int
-is_mbc_ambiguous(OnigAmbigType flag, const UChar** pp, const UChar* end)
+is_mbc_ambiguous(OnigCaseFoldType flag,
+                const UChar** pp, const UChar* end)
 {
+  int v;
   const UChar* p = *pp;
 
-  (*pp)++;
-  if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
-       ONIGENC_IS_MBC_ASCII(p)) ||
-      ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 &&
-       !ONIGENC_IS_MBC_ASCII(p))) {
-    int v = (EncISO_8859_14_CtypeTable[*p] &
-             (ONIGENC_CTYPE_UPPER | ONIGENC_CTYPE_LOWER));
-
-    if ((v | ONIGENC_CTYPE_LOWER) != 0) {
-      /* 0xdf is lower case letter, but can't convert. */
-      if (*p == 0xdf)
-        return FALSE;
-      else
-        return TRUE;
-    }
+  if (*p == 0xdf && (flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) {
+    (*pp)++;
+    return TRUE;
+  }
 
-    return (v != 0 ? TRUE : FALSE);
+  (*pp)++;
+  v = (EncISO_8859_14_CtypeTable[*p] & (BIT_CTYPE_UPPER | BIT_CTYPE_LOWER));
+  if ((v | BIT_CTYPE_LOWER) != 0) {
+    return TRUE;
   }
-  return FALSE;
+
+  return (v != 0 ? TRUE : FALSE);
 }
+#endif
 
 static int
 is_code_ctype(OnigCodePoint code, unsigned int ctype)
@@ -157,115 +153,72 @@ is_code_ctype(OnigCodePoint code, unsigned int ctype)
     return FALSE;
 }
 
+static const OnigPairCaseFoldCodes CaseFoldMap[] = {
+ { 0xa1, 0xa2 },
+ { 0xa4, 0xa5 },
+ { 0xa6, 0xab },
+ { 0xa8, 0xb8 },
+ { 0xaa, 0xba },
+ { 0xac, 0xbc },
+ { 0xaf, 0xff },
+
+ { 0xb0, 0xb1 },
+ { 0xb2, 0xb3 },
+ { 0xb4, 0xb5 },
+ { 0xb7, 0xb9 },
+ { 0xbb, 0xbf },
+ { 0xbd, 0xbe },
+
+ { 0xc0, 0xe0 },
+ { 0xc1, 0xe1 },
+ { 0xc2, 0xe2 },
+ { 0xc3, 0xe3 },
+ { 0xc4, 0xe4 },
+ { 0xc5, 0xe5 },
+ { 0xc6, 0xe6 },
+ { 0xc7, 0xe7 },
+ { 0xc8, 0xe8 },
+ { 0xc9, 0xe9 },
+ { 0xca, 0xea },
+ { 0xcb, 0xeb },
+ { 0xcc, 0xec },
+ { 0xcd, 0xed },
+ { 0xce, 0xee },
+ { 0xcf, 0xef },
+
+ { 0xd0, 0xf0 },
+ { 0xd1, 0xf1 },
+ { 0xd2, 0xf2 },
+ { 0xd3, 0xf3 },
+ { 0xd4, 0xf4 },
+ { 0xd5, 0xf5 },
+ { 0xd6, 0xf6 },
+ { 0xd7, 0xf7 },
+ { 0xd8, 0xf8 },
+ { 0xd9, 0xf9 },
+ { 0xda, 0xfa },
+ { 0xdb, 0xfb },
+ { 0xdc, 0xfc },
+ { 0xdd, 0xfd },
+ { 0xde, 0xfe }
+};
+
 static int
-get_all_pair_ambig_codes(OnigAmbigType flag, const OnigPairAmbigCodes** ccs)
+apply_all_case_fold(OnigCaseFoldType flag,
+                   OnigApplyAllCaseFoldFunc f, void* arg)
 {
-  static const OnigPairAmbigCodes cc[] = {
-   { 0xa1, 0xa2 },
-   { 0xa2, 0xa1 },
-   { 0xa4, 0xa5 },
-   { 0xa5, 0xa4 },
-   { 0xa6, 0xab },
-   { 0xa8, 0xb8 },
-   { 0xaa, 0xba },
-   { 0xab, 0xa6 },
-   { 0xac, 0xbc },
-   { 0xaf, 0xff },
-
-   { 0xb0, 0xb1 },
-   { 0xb1, 0xb0 },
-   { 0xb2, 0xb3 },
-   { 0xb3, 0xb2 },
-   { 0xb4, 0xb5 },
-   { 0xb5, 0xb4 },
-   { 0xb7, 0xb9 },
-   { 0xb8, 0xa8 },
-   { 0xb9, 0xb7 },
-   { 0xba, 0xaa },
-   { 0xbb, 0xbf },
-   { 0xbc, 0xac },
-   { 0xbd, 0xbe },
-   { 0xbe, 0xbd },
-   { 0xbf, 0xbb },
-
-   { 0xc0, 0xe0 },
-   { 0xc1, 0xe1 },
-   { 0xc2, 0xe2 },
-   { 0xc3, 0xe3 },
-   { 0xc4, 0xe4 },
-   { 0xc5, 0xe5 },
-   { 0xc6, 0xe6 },
-   { 0xc7, 0xe7 },
-   { 0xc8, 0xe8 },
-   { 0xc9, 0xe9 },
-   { 0xca, 0xea },
-   { 0xcb, 0xeb },
-   { 0xcc, 0xec },
-   { 0xcd, 0xed },
-   { 0xce, 0xee },
-   { 0xcf, 0xef },
-
-   { 0xd0, 0xf0 },
-   { 0xd1, 0xf1 },
-   { 0xd2, 0xf2 },
-   { 0xd3, 0xf3 },
-   { 0xd4, 0xf4 },
-   { 0xd5, 0xf5 },
-   { 0xd6, 0xf6 },
-   { 0xd7, 0xf7 },
-   { 0xd8, 0xf8 },
-   { 0xd9, 0xf9 },
-   { 0xda, 0xfa },
-   { 0xdb, 0xfb },
-   { 0xdc, 0xfc },
-   { 0xdd, 0xfd },
-   { 0xde, 0xfe },
-
-   { 0xe0, 0xc0 },
-   { 0xe1, 0xc1 },
-   { 0xe2, 0xc2 },
-   { 0xe3, 0xc3 },
-   { 0xe4, 0xc4 },
-   { 0xe5, 0xc5 },
-   { 0xe6, 0xc6 },
-   { 0xe7, 0xc7 },
-   { 0xe8, 0xc8 },
-   { 0xe9, 0xc9 },
-   { 0xea, 0xca },
-   { 0xeb, 0xcb },
-   { 0xec, 0xcc },
-   { 0xed, 0xcd },
-   { 0xee, 0xce },
-   { 0xef, 0xcf },
-
-   { 0xf0, 0xd0 },
-   { 0xf1, 0xd1 },
-   { 0xf2, 0xd2 },
-   { 0xf3, 0xd3 },
-   { 0xf4, 0xd4 },
-   { 0xf5, 0xd5 },
-   { 0xf6, 0xd6 },
-   { 0xf7, 0xd7 },
-   { 0xf8, 0xd8 },
-   { 0xf9, 0xd9 },
-   { 0xfa, 0xda },
-   { 0xfb, 0xdb },
-   { 0xfc, 0xdc },
-   { 0xfd, 0xdd },
-   { 0xfe, 0xde },
-   { 0xff, 0xaf }
-  };
+  return onigenc_apply_all_case_fold_with_map(
+             sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 1,
+             flag, f, arg);
+}
 
-  if (flag == ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) {
-    *ccs = OnigAsciiPairAmbigCodes;
-    return 52;
-  }
-  if (flag == ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) {
-    *ccs = cc;
-    return sizeof(cc) / sizeof(OnigPairAmbigCodes);
-  }
-  else
-    return 0;
+static int
+get_case_fold_codes_by_str(OnigCaseFoldType flag,
+    const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[])
+{
+  return onigenc_get_case_fold_codes_by_str_with_map(
+            sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 1,
+            flag, p, end, items);
 }
 
 OnigEncodingType OnigEncodingISO_8859_14 = {
@@ -273,24 +226,14 @@ OnigEncodingType OnigEncodingISO_8859_14 = {
   "ISO-8859-14",  /* name */
   1,             /* max enc length */
   1,             /* min enc length */
-  (ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE |
-   ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE ),
-  {
-      (OnigCodePoint )'\\'                       /* esc */
-    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.'  */
-    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*'  */
-    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
-    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
-    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
-  },
   onigenc_is_mbc_newline_0x0a,
   onigenc_single_byte_mbc_to_code,
   onigenc_single_byte_code_to_mbclen,
   onigenc_single_byte_code_to_mbc,
-  mbc_to_normalize,
-  is_mbc_ambiguous,
-  get_all_pair_ambig_codes,
-  onigenc_ess_tsett_get_all_comp_ambig_codes,
+  mbc_case_fold,
+  apply_all_case_fold,
+  get_case_fold_codes_by_str,
+  onigenc_minimum_property_name_to_ctype,
   is_code_ctype,
   onigenc_not_support_get_ctype_code_range,
   onigenc_single_byte_left_adjust_char_head,
index f643b895df2d050192a158243d067e67dbc9c61d..08492fb4d9e99b80d2bca1a7982880ff2f4309db 100644 (file)
@@ -31,7 +31,7 @@
 
 #define ENC_ISO_8859_15_TO_LOWER_CASE(c) EncISO_8859_15_ToLowerCaseTable[c]
 #define ENC_IS_ISO_8859_15_CTYPE(code,ctype) \
-  ((EncISO_8859_15_CtypeTable[code] & ctype) != 0)
+  ((EncISO_8859_15_CtypeTable[code] & CTYPE_TO_BIT(ctype)) != 0)
 
 static const UChar EncISO_8859_15_ToLowerCaseTable[256] = {
   '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
@@ -69,84 +69,84 @@ static const UChar EncISO_8859_15_ToLowerCaseTable[256] = {
 };
 
 static const unsigned short EncISO_8859_15_CtypeTable[256] = {
-  0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
-  0x2008, 0x220c, 0x2209, 0x2208, 0x2208, 0x2208, 0x2008, 0x2008,
-  0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
-  0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
-  0x2284, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
-  0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
-  0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0,
-  0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
-  0x21a0, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x34a2,
-  0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
-  0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
-  0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x31a0,
-  0x21a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2,
-  0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
-  0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
-  0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x2008,
+  0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
+  0x4008, 0x420c, 0x4209, 0x4208, 0x4208, 0x4208, 0x4008, 0x4008,
+  0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
+  0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
+  0x4284, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
+  0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
+  0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0,
+  0x78b0, 0x78b0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
+  0x41a0, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x74a2,
+  0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2,
+  0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2,
+  0x74a2, 0x74a2, 0x74a2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x51a0,
+  0x41a0, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x70e2,
+  0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2,
+  0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2,
+  0x70e2, 0x70e2, 0x70e2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x4008,
   0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
   0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
   0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
   0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
-  0x0284, 0x01a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x14a2, 0x00a0,
-  0x10e2, 0x00a0, 0x10e2, 0x01a0, 0x00a0, 0x01a0, 0x00a0, 0x00a0,
-  0x00a0, 0x00a0, 0x10a0, 0x10a0, 0x14a2, 0x10e2, 0x00a0, 0x01a0,
-  0x10e2, 0x10a0, 0x10e2, 0x01a0, 0x14a2, 0x10e2, 0x14a2, 0x01a0,
-  0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2,
-  0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2,
-  0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x00a0,
-  0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x10e2,
-  0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2,
-  0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2,
-  0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x00a0,
-  0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2
+  0x0284, 0x01a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x34a2, 0x00a0,
+  0x30e2, 0x00a0, 0x30e2, 0x01a0, 0x00a0, 0x01a0, 0x00a0, 0x00a0,
+  0x00a0, 0x00a0, 0x10a0, 0x10a0, 0x34a2, 0x30e2, 0x00a0, 0x01a0,
+  0x30e2, 0x10a0, 0x30e2, 0x01a0, 0x34a2, 0x30e2, 0x34a2, 0x01a0,
+  0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
+  0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
+  0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x00a0,
+  0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x30e2,
+  0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
+  0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
+  0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x00a0,
+  0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2
 };
 
 static int
-mbc_to_normalize(OnigAmbigType flag,
-                const UChar** pp, const UChar* end, UChar* lower)
+mbc_case_fold(OnigCaseFoldType flag,
+             const UChar** pp, const UChar* end ARG_UNUSED, UChar* lower)
 {
   const UChar* p = *pp;
 
-  if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
-       ONIGENC_IS_MBC_ASCII(p)) ||
-      ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 &&
-       !ONIGENC_IS_MBC_ASCII(p))) {
-    *lower = ENC_ISO_8859_15_TO_LOWER_CASE(*p);
-  }
-  else {
-    *lower = *p;
+  if (*p == 0xdf && (flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) {
+    *lower++ = 's';
+    *lower   = 's';
+    (*pp)++;
+    return 2;
   }
+
+  *lower = ENC_ISO_8859_15_TO_LOWER_CASE(*p);
   (*pp)++;
   return 1; /* return byte length of converted char to lower */
 }
 
+#if 0
 static int
-is_mbc_ambiguous(OnigAmbigType flag, const UChar** pp, const UChar* end)
+is_mbc_ambiguous(OnigCaseFoldType flag,
+                const UChar** pp, const UChar* end)
 {
+  int v;
   const UChar* p = *pp;
 
-  (*pp)++;
-  if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
-       ONIGENC_IS_MBC_ASCII(p)) ||
-      ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 &&
-       !ONIGENC_IS_MBC_ASCII(p))) {
-    int v = (EncISO_8859_15_CtypeTable[*p] &
-             (ONIGENC_CTYPE_UPPER | ONIGENC_CTYPE_LOWER));
-
-    if ((v | ONIGENC_CTYPE_LOWER) != 0) {
-      /* 0xdf etc.. are lower case letter, but can't convert. */
-      if (*p == 0xdf || *p == 0xaa || *p == 0xb5 || *p == 0xba)
-        return FALSE;
-      else
-        return TRUE;
-    }
+  if (*p == 0xdf && (flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) {
+    (*pp)++;
+    return TRUE;
+  }
 
-    return (v != 0 ? TRUE : FALSE);
+  (*pp)++;
+  v = (EncISO_8859_15_CtypeTable[*p] & (BIT_CTYPE_UPPER | BIT_CTYPE_LOWER));
+  if ((v | BIT_CTYPE_LOWER) != 0) {
+    /* 0xdf etc.. are lower case letter, but can't convert. */
+    if (*p == 0xaa || *p == 0xb5 || *p == 0xba)
+      return FALSE;
+    else
+      return TRUE;
   }
-  return FALSE;
+
+  return (v != 0 ? TRUE : FALSE);
 }
+#endif
 
 static int
 is_code_ctype(OnigCodePoint code, unsigned int ctype)
@@ -157,96 +157,62 @@ is_code_ctype(OnigCodePoint code, unsigned int ctype)
     return FALSE;
 }
 
+static const OnigPairCaseFoldCodes CaseFoldMap[] = {
+ { 0xa6, 0xa8 },
+
+ { 0xb4, 0xb8 },
+ { 0xbc, 0xbd },
+ { 0xbe, 0xff },
+
+ { 0xc0, 0xe0 },
+ { 0xc1, 0xe1 },
+ { 0xc2, 0xe2 },
+ { 0xc3, 0xe3 },
+ { 0xc4, 0xe4 },
+ { 0xc5, 0xe5 },
+ { 0xc6, 0xe6 },
+ { 0xc7, 0xe7 },
+ { 0xc8, 0xe8 },
+ { 0xc9, 0xe9 },
+ { 0xca, 0xea },
+ { 0xcb, 0xeb },
+ { 0xcc, 0xec },
+ { 0xcd, 0xed },
+ { 0xce, 0xee },
+ { 0xcf, 0xef },
+
+ { 0xd0, 0xf0 },
+ { 0xd1, 0xf1 },
+ { 0xd2, 0xf2 },
+ { 0xd3, 0xf3 },
+ { 0xd4, 0xf4 },
+ { 0xd5, 0xf5 },
+ { 0xd6, 0xf6 },
+ { 0xd8, 0xf8 },
+ { 0xd9, 0xf9 },
+ { 0xda, 0xfa },
+ { 0xdb, 0xfb },
+ { 0xdc, 0xfc },
+ { 0xdd, 0xfd },
+ { 0xde, 0xfe }
+};
+
 static int
-get_all_pair_ambig_codes(OnigAmbigType flag,
-                        const OnigPairAmbigCodes** ccs)
+apply_all_case_fold(OnigCaseFoldType flag,
+                   OnigApplyAllCaseFoldFunc f, void* arg)
 {
-  static const OnigPairAmbigCodes cc[] = {
-    { 0xa6, 0xa8 },
-    { 0xa8, 0xa6 },
-
-    { 0xb4, 0xb8 },
-    { 0xb8, 0xb4 },
-    { 0xbc, 0xbd },
-    { 0xbd, 0xbc },
-    { 0xbe, 0xff },
-
-    { 0xc0, 0xe0 },
-    { 0xc1, 0xe1 },
-    { 0xc2, 0xe2 },
-    { 0xc3, 0xe3 },
-    { 0xc4, 0xe4 },
-    { 0xc5, 0xe5 },
-    { 0xc6, 0xe6 },
-    { 0xc7, 0xe7 },
-    { 0xc8, 0xe8 },
-    { 0xc9, 0xe9 },
-    { 0xca, 0xea },
-    { 0xcb, 0xeb },
-    { 0xcc, 0xec },
-    { 0xcd, 0xed },
-    { 0xce, 0xee },
-    { 0xcf, 0xef },
-
-    { 0xd0, 0xf0 },
-    { 0xd1, 0xf1 },
-    { 0xd2, 0xf2 },
-    { 0xd3, 0xf3 },
-    { 0xd4, 0xf4 },
-    { 0xd5, 0xf5 },
-    { 0xd6, 0xf6 },
-    { 0xd8, 0xf8 },
-    { 0xd9, 0xf9 },
-    { 0xda, 0xfa },
-    { 0xdb, 0xfb },
-    { 0xdc, 0xfc },
-    { 0xdd, 0xfd },
-    { 0xde, 0xfe },
-
-    { 0xe0, 0xc0 },
-    { 0xe1, 0xc1 },
-    { 0xe2, 0xc2 },
-    { 0xe3, 0xc3 },
-    { 0xe4, 0xc4 },
-    { 0xe5, 0xc5 },
-    { 0xe6, 0xc6 },
-    { 0xe7, 0xc7 },
-    { 0xe8, 0xc8 },
-    { 0xe9, 0xc9 },
-    { 0xea, 0xca },
-    { 0xeb, 0xcb },
-    { 0xec, 0xcc },
-    { 0xed, 0xcd },
-    { 0xee, 0xce },
-    { 0xef, 0xcf },
-
-    { 0xf0, 0xd0 },
-    { 0xf1, 0xd1 },
-    { 0xf2, 0xd2 },
-    { 0xf3, 0xd3 },
-    { 0xf4, 0xd4 },
-    { 0xf5, 0xd5 },
-    { 0xf6, 0xd6 },
-    { 0xf8, 0xd8 },
-    { 0xf9, 0xd9 },
-    { 0xfa, 0xda },
-    { 0xfb, 0xdb },
-    { 0xfc, 0xdc },
-    { 0xfd, 0xdd },
-    { 0xfe, 0xde },
-    { 0xff, 0xbe }
-  };
+  return onigenc_apply_all_case_fold_with_map(
+             sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 1,
+             flag, f, arg);
+}
 
-  if (flag == ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) {
-    *ccs = OnigAsciiPairAmbigCodes;
-    return 52;
-  }
-  if (flag == ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) {
-    *ccs = cc;
-    return sizeof(cc) / sizeof(OnigPairAmbigCodes);
-  }
-  else
-    return 0;
+static int
+get_case_fold_codes_by_str(OnigCaseFoldType flag,
+    const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[])
+{
+  return onigenc_get_case_fold_codes_by_str_with_map(
+            sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 1,
+            flag, p, end, items);
 }
 
 OnigEncodingType OnigEncodingISO_8859_15 = {
@@ -254,24 +220,14 @@ OnigEncodingType OnigEncodingISO_8859_15 = {
   "ISO-8859-15",  /* name */
   1,             /* max enc length */
   1,             /* min enc length */
-  (ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE |
-   ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE ),
-  {
-      (OnigCodePoint )'\\'                       /* esc */
-    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.'  */
-    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*'  */
-    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
-    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
-    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
-  },
   onigenc_is_mbc_newline_0x0a,
   onigenc_single_byte_mbc_to_code,
   onigenc_single_byte_code_to_mbclen,
   onigenc_single_byte_code_to_mbc,
-  mbc_to_normalize,
-  is_mbc_ambiguous,
-  get_all_pair_ambig_codes,
-  onigenc_ess_tsett_get_all_comp_ambig_codes,
+  mbc_case_fold,
+  apply_all_case_fold,
+  get_case_fold_codes_by_str,
+  onigenc_minimum_property_name_to_ctype,
   is_code_ctype,
   onigenc_not_support_get_ctype_code_range,
   onigenc_single_byte_left_adjust_char_head,
index 921ae36d9d22a2061dea4ab5058d995591eb00e6..8b39c58a6b43830dc370507e36ce1bb9e66779a8 100644 (file)
@@ -31,7 +31,7 @@
 
 #define ENC_ISO_8859_16_TO_LOWER_CASE(c) EncISO_8859_16_ToLowerCaseTable[c]
 #define ENC_IS_ISO_8859_16_CTYPE(code,ctype) \
-  ((EncISO_8859_16_CtypeTable[code] & ctype) != 0)
+  ((EncISO_8859_16_CtypeTable[code] & CTYPE_TO_BIT(ctype)) != 0)
 
 static const UChar EncISO_8859_16_ToLowerCaseTable[256] = {
   '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
@@ -69,84 +69,79 @@ static const UChar EncISO_8859_16_ToLowerCaseTable[256] = {
 };
 
 static const unsigned short EncISO_8859_16_CtypeTable[256] = {
-  0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
-  0x2008, 0x220c, 0x2209, 0x2208, 0x2208, 0x2208, 0x2008, 0x2008,
-  0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
-  0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
-  0x2284, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
-  0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
-  0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0,
-  0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
-  0x21a0, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x34a2,
-  0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
-  0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
-  0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x31a0,
-  0x21a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2,
-  0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
-  0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
-  0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x2008,
+  0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
+  0x4008, 0x420c, 0x4209, 0x4208, 0x4208, 0x4208, 0x4008, 0x4008,
+  0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
+  0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
+  0x4284, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
+  0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
+  0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0,
+  0x78b0, 0x78b0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
+  0x41a0, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x74a2,
+  0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2,
+  0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2,
+  0x74a2, 0x74a2, 0x74a2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x51a0,
+  0x41a0, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x70e2,
+  0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2,
+  0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2,
+  0x70e2, 0x70e2, 0x70e2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x4008,
   0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
   0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
   0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
   0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
-  0x0284, 0x14a2, 0x10e2, 0x14a2, 0x00a0, 0x01a0, 0x14a2, 0x00a0,
-  0x10e2, 0x00a0, 0x14a2, 0x01a0, 0x14a2, 0x01a0, 0x10e2, 0x14a2,
-  0x00a0, 0x00a0, 0x14a2, 0x10e2, 0x14a2, 0x01a0, 0x00a0, 0x01a0,
-  0x10e2, 0x10e2, 0x10e2, 0x01a0, 0x14a2, 0x10e2, 0x14a2, 0x10e2,
-  0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2,
-  0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2,
-  0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2,
-  0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x10e2,
-  0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2,
-  0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2,
-  0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2,
-  0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2
+  0x0284, 0x34a2, 0x30e2, 0x34a2, 0x00a0, 0x01a0, 0x34a2, 0x00a0,
+  0x30e2, 0x00a0, 0x34a2, 0x01a0, 0x34a2, 0x01a0, 0x30e2, 0x34a2,
+  0x00a0, 0x00a0, 0x34a2, 0x30e2, 0x34a2, 0x01a0, 0x00a0, 0x01a0,
+  0x30e2, 0x30e2, 0x30e2, 0x01a0, 0x34a2, 0x30e2, 0x34a2, 0x30e2,
+  0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
+  0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
+  0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
+  0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x30e2,
+  0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
+  0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
+  0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
+  0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2
 };
 
 static int
-mbc_to_normalize(OnigAmbigType flag,
-                const UChar** pp, const UChar* end, UChar* lower)
+mbc_case_fold(OnigCaseFoldType flag,
+             const UChar** pp, const UChar* end ARG_UNUSED, UChar* lower)
 {
   const UChar* p = *pp;
 
-  if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
-       ONIGENC_IS_MBC_ASCII(p)) ||
-      ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 &&
-       !ONIGENC_IS_MBC_ASCII(p))) {
-    *lower = ENC_ISO_8859_16_TO_LOWER_CASE(*p);
-  }
-  else {
-    *lower = *p;
+  if (*p == 0xdf && (flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) {
+    *lower++ = 's';
+    *lower   = 's';
+    (*pp)++;
+    return 2;
   }
+
+  *lower = ENC_ISO_8859_16_TO_LOWER_CASE(*p);
   (*pp)++;
   return 1; /* return byte length of converted char to lower */
 }
 
+#if 0
 static int
-is_mbc_ambiguous(OnigAmbigType flag, const UChar** pp, const UChar* end)
+is_mbc_ambiguous(OnigCaseFoldType flag, const UChar** pp, const UChar* end)
 {
+  int v;
   const UChar* p = *pp;
 
-  (*pp)++;
-  if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
-       ONIGENC_IS_MBC_ASCII(p)) ||
-      ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 &&
-       !ONIGENC_IS_MBC_ASCII(p))) {
-    int v = (EncISO_8859_16_CtypeTable[*p] &
-             (ONIGENC_CTYPE_UPPER | ONIGENC_CTYPE_LOWER));
-
-    if ((v | ONIGENC_CTYPE_LOWER) != 0) {
-      /* 0xdf is lower case letter, but can't convert. */
-      if (*p == 0xdf)
-        return FALSE;
-      else
-        return TRUE;
-    }
+  if (*p == 0xdf && (flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) {
+    (*pp)++;
+    return TRUE;
+  }
 
-    return (v != 0 ? TRUE : FALSE);
+  (*pp)++;
+  v = (EncISO_8859_16_CtypeTable[*p] & (BIT_CTYPE_UPPER | BIT_CTYPE_LOWER));
+  if ((v | BIT_CTYPE_LOWER) != 0) {
+    return TRUE;
   }
-  return FALSE;
+
+  return (v != 0 ? TRUE : FALSE);
 }
+#endif
 
 static int
 is_code_ctype(OnigCodePoint code, unsigned int ctype)
@@ -157,109 +152,69 @@ is_code_ctype(OnigCodePoint code, unsigned int ctype)
     return FALSE;
 }
 
+static const OnigPairCaseFoldCodes CaseFoldMap[] = {
+ { 0xa1, 0xa2 },
+ { 0xa3, 0xb3 },
+ { 0xa6, 0xa8 },
+ { 0xaa, 0xba },
+ { 0xac, 0xae },
+ { 0xaf, 0xbf },
+
+ { 0xb2, 0xb9 },
+ { 0xb4, 0xb8 },
+ { 0xbc, 0xbd },
+ { 0xbe, 0xff },
+
+ { 0xc0, 0xe0 },
+ { 0xc1, 0xe1 },
+ { 0xc2, 0xe2 },
+ { 0xc3, 0xe3 },
+ { 0xc4, 0xe4 },
+ { 0xc5, 0xe5 },
+ { 0xc6, 0xe6 },
+ { 0xc7, 0xe7 },
+ { 0xc8, 0xe8 },
+ { 0xc9, 0xe9 },
+ { 0xca, 0xea },
+ { 0xcb, 0xeb },
+ { 0xcc, 0xec },
+ { 0xcd, 0xed },
+ { 0xce, 0xee },
+ { 0xcf, 0xef },
+
+ { 0xd0, 0xf0 },
+ { 0xd1, 0xf1 },
+ { 0xd2, 0xf2 },
+ { 0xd3, 0xf3 },
+ { 0xd4, 0xf4 },
+ { 0xd5, 0xf5 },
+ { 0xd6, 0xf6 },
+ { 0xd7, 0xf7 },
+ { 0xd8, 0xf8 },
+ { 0xd9, 0xf9 },
+ { 0xda, 0xfa },
+ { 0xdb, 0xfb },
+ { 0xdc, 0xfc },
+ { 0xdd, 0xfd },
+ { 0xde, 0xfe }
+};
+
 static int
-get_all_pair_ambig_codes(OnigAmbigType flag, const OnigPairAmbigCodes** ccs)
+apply_all_case_fold(OnigCaseFoldType flag,
+                   OnigApplyAllCaseFoldFunc f, void* arg)
 {
-  static const OnigPairAmbigCodes cc[] = {
-   { 0xa1, 0xa2 },
-   { 0xa2, 0xa1 },
-   { 0xa3, 0xb3 },
-   { 0xa6, 0xa8 },
-   { 0xa8, 0xa6 },
-   { 0xaa, 0xba },
-   { 0xac, 0xae },
-   { 0xae, 0xac },
-   { 0xaf, 0xbf },
-
-   { 0xb2, 0xb9 },
-   { 0xb3, 0xa3 },
-   { 0xb4, 0xb8 },
-   { 0xb8, 0xb4 },
-   { 0xb9, 0xb2 },
-   { 0xba, 0xaa },
-   { 0xbc, 0xbd },
-   { 0xbd, 0xbc },
-   { 0xbe, 0xff },
-   { 0xbf, 0xaf },
-
-   { 0xc0, 0xe0 },
-   { 0xc1, 0xe1 },
-   { 0xc2, 0xe2 },
-   { 0xc3, 0xe3 },
-   { 0xc4, 0xe4 },
-   { 0xc5, 0xe5 },
-   { 0xc6, 0xe6 },
-   { 0xc7, 0xe7 },
-   { 0xc8, 0xe8 },
-   { 0xc9, 0xe9 },
-   { 0xca, 0xea },
-   { 0xcb, 0xeb },
-   { 0xcc, 0xec },
-   { 0xcd, 0xed },
-   { 0xce, 0xee },
-   { 0xcf, 0xef },
-
-   { 0xd0, 0xf0 },
-   { 0xd1, 0xf1 },
-   { 0xd2, 0xf2 },
-   { 0xd3, 0xf3 },
-   { 0xd4, 0xf4 },
-   { 0xd5, 0xf5 },
-   { 0xd6, 0xf6 },
-   { 0xd7, 0xf7 },
-   { 0xd8, 0xf8 },
-   { 0xd9, 0xf9 },
-   { 0xda, 0xfa },
-   { 0xdb, 0xfb },
-   { 0xdc, 0xfc },
-   { 0xdd, 0xfd },
-   { 0xde, 0xfe },
-
-   { 0xe0, 0xc0 },
-   { 0xe1, 0xc1 },
-   { 0xe2, 0xc2 },
-   { 0xe3, 0xc3 },
-   { 0xe4, 0xc4 },
-   { 0xe5, 0xc5 },
-   { 0xe6, 0xc6 },
-   { 0xe7, 0xc7 },
-   { 0xe8, 0xc8 },
-   { 0xe9, 0xc9 },
-   { 0xea, 0xca },
-   { 0xeb, 0xcb },
-   { 0xec, 0xcc },
-   { 0xed, 0xcd },
-   { 0xee, 0xce },
-   { 0xef, 0xcf },
-
-   { 0xf0, 0xd0 },
-   { 0xf1, 0xd1 },
-   { 0xf2, 0xd2 },
-   { 0xf3, 0xd3 },
-   { 0xf4, 0xd4 },
-   { 0xf5, 0xd5 },
-   { 0xf6, 0xd6 },
-   { 0xf7, 0xd7 },
-   { 0xf8, 0xd8 },
-   { 0xf9, 0xd9 },
-   { 0xfa, 0xda },
-   { 0xfb, 0xdb },
-   { 0xfc, 0xdc },
-   { 0xfd, 0xdd },
-   { 0xfe, 0xde },
-   { 0xff, 0xbe }
-  };
+  return onigenc_apply_all_case_fold_with_map(
+             sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 1,
+             flag, f, arg);
+}
 
-  if (flag == ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) {
-    *ccs = OnigAsciiPairAmbigCodes;
-    return 52;
-  }
-  if (flag == ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) {
-    *ccs = cc;
-    return sizeof(cc) / sizeof(OnigPairAmbigCodes);
-  }
-  else
-    return 0;
+static int
+get_case_fold_codes_by_str(OnigCaseFoldType flag,
+    const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[])
+{
+  return onigenc_get_case_fold_codes_by_str_with_map(
+            sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 1,
+            flag, p, end, items);
 }
 
 OnigEncodingType OnigEncodingISO_8859_16 = {
@@ -267,24 +222,14 @@ OnigEncodingType OnigEncodingISO_8859_16 = {
   "ISO-8859-16",  /* name */
   1,             /* max enc length */
   1,             /* min enc length */
-  (ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE |
-   ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE ),
-  {
-      (OnigCodePoint )'\\'                       /* esc */
-    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.'  */
-    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*'  */
-    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
-    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
-    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
-  },
   onigenc_is_mbc_newline_0x0a,
   onigenc_single_byte_mbc_to_code,
   onigenc_single_byte_code_to_mbclen,
   onigenc_single_byte_code_to_mbc,
-  mbc_to_normalize,
-  is_mbc_ambiguous,
-  get_all_pair_ambig_codes,
-  onigenc_ess_tsett_get_all_comp_ambig_codes,
+  mbc_case_fold,
+  apply_all_case_fold,
+  get_case_fold_codes_by_str,
+  onigenc_minimum_property_name_to_ctype,
   is_code_ctype,
   onigenc_not_support_get_ctype_code_range,
   onigenc_single_byte_left_adjust_char_head,
index f8cb3756f2849a630d26e4ac055e22d7f7d43a03..80b93ba1ba3e1aca8b80579c4bb60d08e43635f8 100644 (file)
@@ -2,7 +2,7 @@
   iso8859_2.c -  Oniguruma (regular expression library)
 **********************************************************************/
 /*-
- * Copyright (c) 2002-2006  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp>
+ * Copyright (c) 2002-2007  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -31,7 +31,7 @@
 
 #define ENC_ISO_8859_2_TO_LOWER_CASE(c) EncISO_8859_2_ToLowerCaseTable[c]
 #define ENC_IS_ISO_8859_2_CTYPE(code,ctype) \
-  ((EncISO_8859_2_CtypeTable[code] & ctype) != 0)
+  ((EncISO_8859_2_CtypeTable[code] & CTYPE_TO_BIT(ctype)) != 0)
 
 static const UChar EncISO_8859_2_ToLowerCaseTable[256] = {
   '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
@@ -69,192 +69,145 @@ static const UChar EncISO_8859_2_ToLowerCaseTable[256] = {
 };
 
 static const unsigned short EncISO_8859_2_CtypeTable[256] = {
-  0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
-  0x2008, 0x220c, 0x2209, 0x2208, 0x2208, 0x2208, 0x2008, 0x2008,
-  0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
-  0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
-  0x2284, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
-  0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
-  0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0,
-  0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
-  0x21a0, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x34a2,
-  0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
-  0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
-  0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x31a0,
-  0x21a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2,
-  0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
-  0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
-  0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x2008,
+  0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
+  0x4008, 0x420c, 0x4209, 0x4208, 0x4208, 0x4208, 0x4008, 0x4008,
+  0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
+  0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
+  0x4284, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
+  0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
+  0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0,
+  0x78b0, 0x78b0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
+  0x41a0, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x74a2,
+  0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2,
+  0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2,
+  0x74a2, 0x74a2, 0x74a2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x51a0,
+  0x41a0, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x70e2,
+  0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2,
+  0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2,
+  0x70e2, 0x70e2, 0x70e2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x4008,
   0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
   0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
   0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
   0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
-  0x0284, 0x14a2, 0x00a0, 0x14a2, 0x00a0, 0x14a2, 0x14a2, 0x00a0,
-  0x00a0, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x01a0, 0x14a2, 0x14a2,
-  0x00a0, 0x10e2, 0x00a0, 0x10e2, 0x00a0, 0x10e2, 0x10e2, 0x00a0,
-  0x00a0, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x00a0, 0x10e2, 0x10e2,
-  0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2,
-  0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2,
-  0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x00a0,
-  0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x10e2,
-  0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2,
-  0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2,
-  0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x00a0,
-  0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x00a0
+  0x0284, 0x34a2, 0x00a0, 0x34a2, 0x00a0, 0x34a2, 0x34a2, 0x00a0,
+  0x00a0, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x01a0, 0x34a2, 0x34a2,
+  0x00a0, 0x30e2, 0x00a0, 0x30e2, 0x00a0, 0x30e2, 0x30e2, 0x00a0,
+  0x00a0, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x00a0, 0x30e2, 0x30e2,
+  0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
+  0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
+  0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x00a0,
+  0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x30e2,
+  0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
+  0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
+  0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x00a0,
+  0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x00a0
 };
 
 static int
-iso_8859_2_mbc_to_normalize(OnigAmbigType flag,
-                            const UChar** pp, const UChar* end, UChar* lower)
+mbc_case_fold(OnigCaseFoldType flag,
+             const UChar** pp, const UChar* end ARG_UNUSED, UChar* lower)
 {
   const UChar* p = *pp;
 
-  if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
-       ONIGENC_IS_MBC_ASCII(p)) ||
-      ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 &&
-       !ONIGENC_IS_MBC_ASCII(p))) {
-    *lower = ENC_ISO_8859_2_TO_LOWER_CASE(*p);
-  }
-  else {
-    *lower = *p;
+  if (*p == 0xdf && (flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) {
+    *lower++ = 's';
+    *lower   = 's';
+    (*pp)++;
+    return 2;
   }
+
+  *lower = ENC_ISO_8859_2_TO_LOWER_CASE(*p);
   (*pp)++;
   return 1; /* return byte length of converted char to lower */
 }
 
+#if 0
 static int
-iso_8859_2_is_mbc_ambiguous(OnigAmbigType flag,
-                           const UChar** pp, const UChar* end)
+is_mbc_ambiguous(OnigCaseFoldType flag, const UChar** pp, const UChar* end)
 {
+  int v;
   const UChar* p = *pp;
 
-  (*pp)++;
-  if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
-       ONIGENC_IS_MBC_ASCII(p)) ||
-      ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 &&
-       !ONIGENC_IS_MBC_ASCII(p))) {
-    int v = (EncISO_8859_2_CtypeTable[*p] &
-             (ONIGENC_CTYPE_UPPER | ONIGENC_CTYPE_LOWER));
-
-    if ((v | ONIGENC_CTYPE_LOWER) != 0) {
-      /* 0xdf is lower case letter, but can't convert. */
-      if (*p == 0xdf)
-        return FALSE;
-      else
-        return TRUE;
-    }
+  if (*p == 0xdf && (flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) {
+    (*pp)++;
+    return TRUE;
+  }
 
-    return (v != 0 ? TRUE : FALSE);
+  (*pp)++;
+  v = (EncISO_8859_2_CtypeTable[*p] & (BIT_CTYPE_UPPER | BIT_CTYPE_LOWER));
+  if ((v | BIT_CTYPE_LOWER) != 0) {
+    return TRUE;
   }
-  return FALSE;
+
+  return (v != 0 ? TRUE : FALSE);
 }
+#endif
+
+static const OnigPairCaseFoldCodes CaseFoldMap[] = {
+ { 0xa1, 0xb1 },
+ { 0xa3, 0xb3 },
+ { 0xa5, 0xb5 },
+ { 0xa6, 0xb6 },
+ { 0xa9, 0xb9 },
+ { 0xaa, 0xba },
+ { 0xab, 0xbb },
+ { 0xac, 0xbc },
+ { 0xae, 0xbe },
+ { 0xaf, 0xbf },
+
+ { 0xc0, 0xe0 },
+ { 0xc1, 0xe1 },
+ { 0xc2, 0xe2 },
+ { 0xc3, 0xe3 },
+ { 0xc4, 0xe4 },
+ { 0xc5, 0xe5 },
+ { 0xc6, 0xe6 },
+ { 0xc7, 0xe7 },
+ { 0xc8, 0xe8 },
+ { 0xc9, 0xe9 },
+ { 0xca, 0xea },
+ { 0xcb, 0xeb },
+ { 0xcc, 0xec },
+ { 0xcd, 0xed },
+ { 0xce, 0xee },
+ { 0xcf, 0xef },
+
+ { 0xd0, 0xf0 },
+ { 0xd1, 0xf1 },
+ { 0xd2, 0xf2 },
+ { 0xd3, 0xf3 },
+ { 0xd4, 0xf4 },
+ { 0xd5, 0xf5 },
+ { 0xd6, 0xf6 },
+ { 0xd8, 0xf8 },
+ { 0xd9, 0xf9 },
+ { 0xda, 0xfa },
+ { 0xdb, 0xfb },
+ { 0xdc, 0xfc },
+ { 0xdd, 0xfd },
+ { 0xde, 0xfe }
+};
 
 static int
-iso_8859_2_get_all_pair_ambig_codes(OnigAmbigType flag,
-                                    const OnigPairAmbigCodes** ccs)
+apply_all_case_fold(OnigCaseFoldType flag,
+                   OnigApplyAllCaseFoldFunc f, void* arg)
 {
-  static const OnigPairAmbigCodes cc[] = {
-    { 0xa1, 0xb1 },
-    { 0xa3, 0xb3 },
-    { 0xa5, 0xb5 },
-    { 0xa6, 0xb6 },
-    { 0xa9, 0xb9 },
-    { 0xaa, 0xba },
-    { 0xab, 0xbb },
-    { 0xac, 0xbc },
-    { 0xae, 0xbe },
-    { 0xaf, 0xbf },
-
-    { 0xb1, 0xa1 },
-    { 0xb3, 0xa3 },
-    { 0xb5, 0xa5 },
-    { 0xb6, 0xa6 },
-    { 0xb9, 0xa9 },
-    { 0xba, 0xaa },
-    { 0xbb, 0xab },
-    { 0xbc, 0xac },
-    { 0xbe, 0xae },
-    { 0xbf, 0xaf },
-
-    { 0xc0, 0xe0 },
-    { 0xc1, 0xe1 },
-    { 0xc2, 0xe2 },
-    { 0xc3, 0xe3 },
-    { 0xc4, 0xe4 },
-    { 0xc5, 0xe5 },
-    { 0xc6, 0xe6 },
-    { 0xc7, 0xe7 },
-    { 0xc8, 0xe8 },
-    { 0xc9, 0xe9 },
-    { 0xca, 0xea },
-    { 0xcb, 0xeb },
-    { 0xcc, 0xec },
-    { 0xcd, 0xed },
-    { 0xce, 0xee },
-    { 0xcf, 0xef },
-
-    { 0xd0, 0xf0 },
-    { 0xd1, 0xf1 },
-    { 0xd2, 0xf2 },
-    { 0xd3, 0xf3 },
-    { 0xd4, 0xf4 },
-    { 0xd5, 0xf5 },
-    { 0xd6, 0xf6 },
-    { 0xd8, 0xf8 },
-    { 0xd9, 0xf9 },
-    { 0xda, 0xfa },
-    { 0xdb, 0xfb },
-    { 0xdc, 0xfc },
-    { 0xdd, 0xfd },
-    { 0xde, 0xfe },
-
-    { 0xe0, 0xc0 },
-    { 0xe1, 0xc1 },
-    { 0xe2, 0xc2 },
-    { 0xe3, 0xc3 },
-    { 0xe4, 0xc4 },
-    { 0xe5, 0xc5 },
-    { 0xe6, 0xc6 },
-    { 0xe7, 0xc7 },
-    { 0xe8, 0xc8 },
-    { 0xe9, 0xc9 },
-    { 0xea, 0xca },
-    { 0xeb, 0xcb },
-    { 0xec, 0xcc },
-    { 0xed, 0xcd },
-    { 0xee, 0xce },
-    { 0xef, 0xcf },
-
-    { 0xf0, 0xd0 },
-    { 0xf1, 0xd1 },
-    { 0xf2, 0xd2 },
-    { 0xf3, 0xd3 },
-    { 0xf4, 0xd4 },
-    { 0xf5, 0xd5 },
-    { 0xf6, 0xd6 },
-    { 0xf8, 0xd8 },
-    { 0xf9, 0xd9 },
-    { 0xfa, 0xda },
-    { 0xfb, 0xdb },
-    { 0xfc, 0xdc },
-    { 0xfd, 0xdd },
-    { 0xfe, 0xde }
-  };
+  return onigenc_apply_all_case_fold_with_map(
+            sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 1,
+            flag, f, arg);
+}
 
-  if (flag == ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) {
-    *ccs = OnigAsciiPairAmbigCodes;
-    return 52;
-  }
-  if (flag == ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) {
-    *ccs = cc;
-    return sizeof(cc) / sizeof(OnigPairAmbigCodes);
-  }
-  else
-    return 0;
+static int
+get_case_fold_codes_by_str(OnigCaseFoldType flag,
+    const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[])
+{
+  return onigenc_get_case_fold_codes_by_str_with_map(
+            sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 1,
+            flag, p, end, items);
 }
 
 static int
-iso_8859_2_is_code_ctype(OnigCodePoint code, unsigned int ctype)
+is_code_ctype(OnigCodePoint code, unsigned int ctype)
 {
   if (code < 256)
     return ENC_IS_ISO_8859_2_CTYPE(code, ctype);
@@ -267,25 +220,15 @@ OnigEncodingType OnigEncodingISO_8859_2 = {
   "ISO-8859-2",  /* name */
   1,             /* max enc length */
   1,             /* min enc length */
-  (ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE |
-   ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE ),
-  {
-      (OnigCodePoint )'\\'                       /* esc */
-    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.'  */
-    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*'  */
-    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
-    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
-    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
-  },
   onigenc_is_mbc_newline_0x0a,
   onigenc_single_byte_mbc_to_code,
   onigenc_single_byte_code_to_mbclen,
   onigenc_single_byte_code_to_mbc,
-  iso_8859_2_mbc_to_normalize,
-  iso_8859_2_is_mbc_ambiguous,
-  iso_8859_2_get_all_pair_ambig_codes,
-  onigenc_ess_tsett_get_all_comp_ambig_codes,
-  iso_8859_2_is_code_ctype,
+  mbc_case_fold,
+  apply_all_case_fold,
+  get_case_fold_codes_by_str,
+  onigenc_minimum_property_name_to_ctype,
+  is_code_ctype,
   onigenc_not_support_get_ctype_code_range,
   onigenc_single_byte_left_adjust_char_head,
   onigenc_always_true_is_allowed_reverse_match
index e62d20de7b14a97c3af4950644481dffe3e59aad..fd1168c3816b85cfbd8d172ba7bbd91739e10c3a 100644 (file)
@@ -2,7 +2,7 @@
   iso8859_3.c -  Oniguruma (regular expression library)
 **********************************************************************/
 /*-
- * Copyright (c) 2002-2006  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp>
+ * Copyright (c) 2002-2007  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -31,7 +31,7 @@
 
 #define ENC_ISO_8859_3_TO_LOWER_CASE(c) EncISO_8859_3_ToLowerCaseTable[c]
 #define ENC_IS_ISO_8859_3_CTYPE(code,ctype) \
-  ((EncISO_8859_3_CtypeTable[code] & ctype) != 0)
+  ((EncISO_8859_3_CtypeTable[code] & CTYPE_TO_BIT(ctype)) != 0)
 
 static const UChar EncISO_8859_3_ToLowerCaseTable[256] = {
   '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
@@ -69,88 +69,86 @@ static const UChar EncISO_8859_3_ToLowerCaseTable[256] = {
 };
 
 static const unsigned short EncISO_8859_3_CtypeTable[256] = {
-  0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
-  0x2008, 0x220c, 0x2209, 0x2208, 0x2208, 0x2208, 0x2008, 0x2008,
-  0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
-  0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
-  0x2284, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
-  0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
-  0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0,
-  0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
-  0x21a0, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x34a2,
-  0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
-  0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
-  0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x31a0,
-  0x21a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2,
-  0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
-  0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
-  0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x2008,
+  0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
+  0x4008, 0x420c, 0x4209, 0x4208, 0x4208, 0x4208, 0x4008, 0x4008,
+  0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
+  0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
+  0x4284, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
+  0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
+  0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0,
+  0x78b0, 0x78b0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
+  0x41a0, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x74a2,
+  0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2,
+  0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2,
+  0x74a2, 0x74a2, 0x74a2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x51a0,
+  0x41a0, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x70e2,
+  0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2,
+  0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2,
+  0x70e2, 0x70e2, 0x70e2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x4008,
   0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
   0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
   0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
   0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
-  0x0284, 0x14a2, 0x00a0, 0x00a0, 0x00a0, 0x0000, 0x14a2, 0x00a0,
-  0x00a0, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x01a0, 0x0000, 0x14a2,
-  0x00a0, 0x10e2, 0x10a0, 0x10a0, 0x00a0, 0x10e2, 0x10e2, 0x01a0,
-  0x00a0, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x11a0, 0x0000, 0x10e2,
-  0x14a2, 0x14a2, 0x14a2, 0x0000, 0x14a2, 0x14a2, 0x14a2, 0x14a2,
-  0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2,
-  0x0000, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x00a0,
-  0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x10e2,
-  0x10e2, 0x10e2, 0x10e2, 0x0000, 0x10e2, 0x10e2, 0x10e2, 0x10e2,
-  0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2,
-  0x0000, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x00a0,
-  0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x00a0
+  0x0284, 0x34a2, 0x00a0, 0x00a0, 0x00a0, 0x0000, 0x34a2, 0x00a0,
+  0x00a0, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x01a0, 0x0000, 0x34a2,
+  0x00a0, 0x30e2, 0x10a0, 0x10a0, 0x00a0, 0x30e2, 0x30e2, 0x01a0,
+  0x00a0, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x11a0, 0x0000, 0x30e2,
+  0x34a2, 0x34a2, 0x34a2, 0x0000, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
+  0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
+  0x0000, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x00a0,
+  0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x30e2,
+  0x30e2, 0x30e2, 0x30e2, 0x0000, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
+  0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
+  0x0000, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x00a0,
+  0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x00a0
 };
 
 static int
-iso_8859_3_mbc_to_normalize(OnigAmbigType flag,
-                            const UChar** pp, const UChar* end, UChar* lower)
+mbc_case_fold(OnigCaseFoldType flag, const UChar** pp,
+             const UChar* end ARG_UNUSED, UChar* lower)
 {
   const UChar* p = *pp;
 
-  if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
-       ONIGENC_IS_MBC_ASCII(p)) ||
-      ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 &&
-       !ONIGENC_IS_MBC_ASCII(p))) {
-    *lower = ENC_ISO_8859_3_TO_LOWER_CASE(*p);
-  }
-  else {
-    *lower = *p;
+  if (*p == 0xdf && (flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) {
+    *lower++ = 's';
+    *lower   = 's';
+    (*pp)++;
+    return 2;
   }
+
+  *lower = ENC_ISO_8859_3_TO_LOWER_CASE(*p);
   (*pp)++;
-  return 1; /* return byte length of converted char to lower */
+  return 1;
 }
 
+#if 0
 static int
-iso_8859_3_is_mbc_ambiguous(OnigAmbigType flag,
-                           const UChar** pp, const UChar* end)
+is_mbc_ambiguous(OnigCaseFoldType flag, const UChar** pp, const UChar* end)
 {
+  int v;
   const UChar* p = *pp;
 
-  (*pp)++;
-  if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
-       ONIGENC_IS_MBC_ASCII(p)) ||
-      ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 &&
-       !ONIGENC_IS_MBC_ASCII(p))) {
-    int v = (EncISO_8859_3_CtypeTable[*p] &
-             (ONIGENC_CTYPE_UPPER | ONIGENC_CTYPE_LOWER));
-
-    if ((v | ONIGENC_CTYPE_LOWER) != 0) {
-      /* 0xdf, 0xaa, 0xb5, 0xba are lower case letter, but can't convert. */
-      if (*p == 0xdf || *p == 0xb5)
-        return FALSE;
-      else
-        return TRUE;
-    }
+  if (*p == 0xdf && (flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) {
+    (*pp)++;
+    return TRUE;
+  }
 
-    return (v != 0 ? TRUE : FALSE);
+  (*pp)++;
+  v = (EncISO_8859_3_CtypeTable[*p] & (BIT_CTYPE_UPPER | BIT_CTYPE_LOWER));
+  if ((v | BIT_CTYPE_LOWER) != 0) {
+    /* 0xaa, 0xb5, 0xba are lower case letter, but can't convert. */
+    if (*p == 0xb5)
+      return FALSE;
+    else
+      return TRUE;
   }
-  return FALSE;
+
+  return (v != 0 ? TRUE : FALSE);
 }
+#endif
 
 static int
-iso_8859_3_is_code_ctype(OnigCodePoint code, unsigned int ctype)
+is_code_ctype(OnigCodePoint code, unsigned int ctype)
 {
   if (code < 256)
     return ENC_IS_ISO_8859_3_CTYPE(code, ctype);
@@ -158,97 +156,63 @@ iso_8859_3_is_code_ctype(OnigCodePoint code, unsigned int ctype)
     return FALSE;
 }
 
-static int
-iso_8859_3_get_all_pair_ambig_codes(OnigAmbigType flag,
-                                    const OnigPairAmbigCodes** ccs)
-{
-  static const OnigPairAmbigCodes cc[] = {
-    { 0xa1, 0xb1 },
-    { 0xa6, 0xb6 },
-    { 0xa9, 0xb9 },
-    { 0xaa, 0xba },
-    { 0xab, 0xbb },
-    { 0xac, 0xbc },
-    { 0xaf, 0xbf },
-    { 0xb1, 0xa1 },
-    { 0xb6, 0xa6 },
-    { 0xb9, 0xa9 },
-    { 0xba, 0xaa },
-    { 0xbb, 0xab },
-    { 0xbc, 0xac },
-    { 0xbf, 0xaf },
-
-    { 0xc0, 0xe0 },
-    { 0xc1, 0xe1 },
-    { 0xc2, 0xe2 },
-    { 0xc4, 0xe4 },
-    { 0xc5, 0xe5 },
-    { 0xc6, 0xe6 },
-    { 0xc7, 0xe7 },
-    { 0xc8, 0xe8 },
-    { 0xc9, 0xe9 },
-    { 0xca, 0xea },
-    { 0xcb, 0xeb },
-    { 0xcc, 0xec },
-    { 0xcd, 0xed },
-    { 0xce, 0xee },
-    { 0xcf, 0xef },
-
-    { 0xd1, 0xf1 },
-    { 0xd2, 0xf2 },
-    { 0xd3, 0xf3 },
-    { 0xd4, 0xf4 },
-    { 0xd5, 0xf5 },
-    { 0xd6, 0xf6 },
-    { 0xd8, 0xf8 },
-    { 0xd9, 0xf9 },
-    { 0xda, 0xfa },
-    { 0xdb, 0xfb },
-    { 0xdc, 0xfc },
-    { 0xdd, 0xfd },
-    { 0xde, 0xfe },
+static const OnigPairCaseFoldCodes CaseFoldMap[] = {
+ { 0xa1, 0xb1 },
+ { 0xa6, 0xb6 },
+ { 0xa9, 0xb9 },
+ { 0xaa, 0xba },
+ { 0xab, 0xbb },
+ { 0xac, 0xbc },
+ { 0xaf, 0xbf },
+
+ { 0xc0, 0xe0 },
+ { 0xc1, 0xe1 },
+ { 0xc2, 0xe2 },
+ { 0xc4, 0xe4 },
+ { 0xc5, 0xe5 },
+ { 0xc6, 0xe6 },
+ { 0xc7, 0xe7 },
+ { 0xc8, 0xe8 },
+ { 0xc9, 0xe9 },
+ { 0xca, 0xea },
+ { 0xcb, 0xeb },
+ { 0xcc, 0xec },
+ { 0xcd, 0xed },
+ { 0xce, 0xee },
+ { 0xcf, 0xef },
+
+ { 0xd1, 0xf1 },
+ { 0xd2, 0xf2 },
+ { 0xd3, 0xf3 },
+ { 0xd4, 0xf4 },
+ { 0xd5, 0xf5 },
+ { 0xd6, 0xf6 },
+ { 0xd8, 0xf8 },
+ { 0xd9, 0xf9 },
+ { 0xda, 0xfa },
+ { 0xdb, 0xfb },
+ { 0xdc, 0xfc },
+ { 0xdd, 0xfd },
+ { 0xde, 0xfe }
+};
 
-    { 0xe0, 0xc0 },
-    { 0xe1, 0xc1 },
-    { 0xe2, 0xc2 },
-    { 0xe4, 0xc4 },
-    { 0xe5, 0xc5 },
-    { 0xe6, 0xc6 },
-    { 0xe7, 0xc7 },
-    { 0xe8, 0xc8 },
-    { 0xe9, 0xc9 },
-    { 0xea, 0xca },
-    { 0xeb, 0xcb },
-    { 0xec, 0xcc },
-    { 0xed, 0xcd },
-    { 0xee, 0xce },
-    { 0xef, 0xcf },
 
-    { 0xf1, 0xd1 },
-    { 0xf2, 0xd2 },
-    { 0xf3, 0xd3 },
-    { 0xf4, 0xd4 },
-    { 0xf5, 0xd5 },
-    { 0xf6, 0xd6 },
-    { 0xf8, 0xd8 },
-    { 0xf9, 0xd9 },
-    { 0xfa, 0xda },
-    { 0xfb, 0xdb },
-    { 0xfc, 0xdc },
-    { 0xfd, 0xdd },
-    { 0xfe, 0xde }
-  };
+static int
+apply_all_case_fold(OnigCaseFoldType flag,
+                   OnigApplyAllCaseFoldFunc f, void* arg)
+{
+  return onigenc_apply_all_case_fold_with_map(
+             sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 1,
+             flag, f, arg);
+}
 
-  if (flag == ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) {
-    *ccs = OnigAsciiPairAmbigCodes;
-    return 52;
-  }
-  if (flag == ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) {
-    *ccs = cc;
-    return sizeof(cc) / sizeof(OnigPairAmbigCodes);
-  }
-  else
-    return 0;
+static int
+get_case_fold_codes_by_str(OnigCaseFoldType flag,
+   const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[])
+{
+  return onigenc_get_case_fold_codes_by_str_with_map(
+            sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 1,
+            flag, p, end, items);
 }
 
 OnigEncodingType OnigEncodingISO_8859_3 = {
@@ -256,25 +220,15 @@ OnigEncodingType OnigEncodingISO_8859_3 = {
   "ISO-8859-3",  /* name */
   1,             /* max enc length */
   1,             /* min enc length */
-  (ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE |
-   ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE ),
-  {
-      (OnigCodePoint )'\\'                       /* esc */
-    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.'  */
-    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*'  */
-    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
-    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
-    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
-  },
   onigenc_is_mbc_newline_0x0a,
   onigenc_single_byte_mbc_to_code,
   onigenc_single_byte_code_to_mbclen,
   onigenc_single_byte_code_to_mbc,
-  iso_8859_3_mbc_to_normalize,
-  iso_8859_3_is_mbc_ambiguous,
-  iso_8859_3_get_all_pair_ambig_codes,
-  onigenc_ess_tsett_get_all_comp_ambig_codes,
-  iso_8859_3_is_code_ctype,
+  mbc_case_fold,
+  apply_all_case_fold,
+  get_case_fold_codes_by_str,
+  onigenc_minimum_property_name_to_ctype,
+  is_code_ctype,
   onigenc_not_support_get_ctype_code_range,
   onigenc_single_byte_left_adjust_char_head,
   onigenc_always_true_is_allowed_reverse_match
index dd6bd7dfe35155a98c664bb0c544e54e6b8dc99f..c124f5653d7a957adb0319dcd51ba57d1ce12b25 100644 (file)
@@ -2,7 +2,7 @@
   iso8859_4.c -  Oniguruma (regular expression library)
 **********************************************************************/
 /*-
- * Copyright (c) 2002-2006  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp>
+ * Copyright (c) 2002-2007  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -31,7 +31,7 @@
 
 #define ENC_ISO_8859_4_TO_LOWER_CASE(c) EncISO_8859_4_ToLowerCaseTable[c]
 #define ENC_IS_ISO_8859_4_CTYPE(code,ctype) \
-  ((EncISO_8859_4_CtypeTable[code] & ctype) != 0)
+  ((EncISO_8859_4_CtypeTable[code] & CTYPE_TO_BIT(ctype)) != 0)
 
 static const UChar EncISO_8859_4_ToLowerCaseTable[256] = {
   '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
@@ -69,88 +69,85 @@ static const UChar EncISO_8859_4_ToLowerCaseTable[256] = {
 };
 
 static const unsigned short EncISO_8859_4_CtypeTable[256] = {
-  0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
-  0x2008, 0x220c, 0x2209, 0x2208, 0x2208, 0x2208, 0x2008, 0x2008,
-  0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
-  0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
-  0x2284, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
-  0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
-  0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0,
-  0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
-  0x21a0, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x34a2,
-  0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
-  0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
-  0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x31a0,
-  0x21a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2,
-  0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
-  0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
-  0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x2008,
+  0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
+  0x4008, 0x420c, 0x4209, 0x4208, 0x4208, 0x4208, 0x4008, 0x4008,
+  0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
+  0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
+  0x4284, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
+  0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
+  0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0,
+  0x78b0, 0x78b0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
+  0x41a0, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x74a2,
+  0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2,
+  0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2,
+  0x74a2, 0x74a2, 0x74a2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x51a0,
+  0x41a0, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x70e2,
+  0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2,
+  0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2,
+  0x70e2, 0x70e2, 0x70e2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x4008,
   0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
   0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
   0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
   0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
-  0x0284, 0x14a2, 0x10e2, 0x14a2, 0x00a0, 0x14a2, 0x14a2, 0x00a0,
-  0x00a0, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x01a0, 0x14a2, 0x00a0,
-  0x00a0, 0x10e2, 0x00a0, 0x10e2, 0x00a0, 0x10e2, 0x10e2, 0x00a0,
-  0x00a0, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x14a2, 0x10e2, 0x10e2,
-  0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2,
-  0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2,
-  0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x00a0,
-  0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x10e2,
-  0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2,
-  0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2,
-  0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x00a0,
-  0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x00a0
+  0x0284, 0x34a2, 0x30e2, 0x34a2, 0x00a0, 0x34a2, 0x34a2, 0x00a0,
+  0x00a0, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x01a0, 0x34a2, 0x00a0,
+  0x00a0, 0x30e2, 0x00a0, 0x30e2, 0x00a0, 0x30e2, 0x30e2, 0x00a0,
+  0x00a0, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x34a2, 0x30e2, 0x30e2,
+  0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
+  0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
+  0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x00a0,
+  0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x30e2,
+  0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
+  0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
+  0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x00a0,
+  0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x00a0
 };
 
 static int
-iso_8859_4_mbc_to_normalize(OnigAmbigType flag,
-                            const UChar** pp, const UChar* end, UChar* lower)
+mbc_case_fold(OnigCaseFoldType flag,
+             const UChar** pp, const UChar* end ARG_UNUSED, UChar* lower)
 {
   const UChar* p = *pp;
 
-  if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
-       ONIGENC_IS_MBC_ASCII(p)) ||
-      ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 &&
-       !ONIGENC_IS_MBC_ASCII(p))) {
-    *lower = ENC_ISO_8859_4_TO_LOWER_CASE(*p);
-  }
-  else {
-    *lower = *p;
+  if (*p == 0xdf && (flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) {
+    *lower++ = 's';
+    *lower   = 's';
+    (*pp)++;
+    return 2;
   }
+
+  *lower = ENC_ISO_8859_4_TO_LOWER_CASE(*p);
   (*pp)++;
   return 1; /* return byte length of converted char to lower */
 }
 
+#if 0
 static int
-iso_8859_4_is_mbc_ambiguous(OnigAmbigType flag,
-                           const UChar** pp, const UChar* end)
+is_mbc_ambiguous(OnigCaseFoldType flag, const UChar** pp, const UChar* end)
 {
+  int v;
   const UChar* p = *pp;
 
-  (*pp)++;
-  if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
-       ONIGENC_IS_MBC_ASCII(p)) ||
-      ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 &&
-       !ONIGENC_IS_MBC_ASCII(p))) {
-    int v = (EncISO_8859_4_CtypeTable[*p] &
-             (ONIGENC_CTYPE_UPPER | ONIGENC_CTYPE_LOWER));
-
-    if ((v | ONIGENC_CTYPE_LOWER) != 0) {
-      /* 0xdf, 0xaa, 0xb5, 0xba are lower case letter, but can't convert. */
-      if (*p == 0xdf || *p == 0xa2)
-        return FALSE;
-      else
-        return TRUE;
-    }
+  if (*p == 0xdf && (flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) {
+    (*pp)++;
+    return TRUE;
+  }
 
-    return (v != 0 ? TRUE : FALSE);
+  (*pp)++;
+  v = (EncISO_8859_4_CtypeTable[*p] & (BIT_CTYPE_UPPER | BIT_CTYPE_LOWER));
+  if ((v | BIT_CTYPE_LOWER) != 0) {
+    if (*p == 0xa2)
+      return FALSE;
+    else
+      return TRUE;
   }
-  return FALSE;
+
+  return (v != 0 ? TRUE : FALSE);
 }
+#endif
 
 static int
-iso_8859_4_is_code_ctype(OnigCodePoint code, unsigned int ctype)
+is_code_ctype(OnigCodePoint code, unsigned int ctype)
 {
   if (code < 256)
     return ENC_IS_ISO_8859_4_CTYPE(code, ctype);
@@ -158,106 +155,66 @@ iso_8859_4_is_code_ctype(OnigCodePoint code, unsigned int ctype)
     return FALSE;
 }
 
+static const OnigPairCaseFoldCodes CaseFoldMap[] = {
+ { 0xa1, 0xb1 },
+ { 0xa3, 0xb3 },
+ { 0xa5, 0xb5 },
+ { 0xa6, 0xb6 },
+ { 0xa9, 0xb9 },
+ { 0xaa, 0xba },
+ { 0xab, 0xbb },
+ { 0xac, 0xbc },
+ { 0xae, 0xbe },
+
+ { 0xc0, 0xe0 },
+ { 0xc1, 0xe1 },
+ { 0xc2, 0xe2 },
+ { 0xc3, 0xe3 },
+ { 0xc4, 0xe4 },
+ { 0xc5, 0xe5 },
+ { 0xc6, 0xe6 },
+ { 0xc7, 0xe7 },
+ { 0xc8, 0xe8 },
+ { 0xc9, 0xe9 },
+ { 0xca, 0xea },
+ { 0xcb, 0xeb },
+ { 0xcc, 0xec },
+ { 0xcd, 0xed },
+ { 0xce, 0xee },
+ { 0xcf, 0xef },
+
+ { 0xd0, 0xf0 },
+ { 0xd1, 0xf1 },
+ { 0xd2, 0xf2 },
+ { 0xd3, 0xf3 },
+ { 0xd4, 0xf4 },
+ { 0xd5, 0xf5 },
+ { 0xd6, 0xf6 },
+ { 0xd8, 0xf8 },
+ { 0xd9, 0xf9 },
+ { 0xda, 0xfa },
+ { 0xdb, 0xfb },
+ { 0xdc, 0xfc },
+ { 0xdd, 0xfd },
+ { 0xde, 0xfe }
+};
+
 static int
-iso_8859_4_get_all_pair_ambig_codes(OnigAmbigType flag,
-                                    const OnigPairAmbigCodes** ccs)
+apply_all_case_fold(OnigCaseFoldType flag,
+                   OnigApplyAllCaseFoldFunc f, void* arg)
 {
-  static const OnigPairAmbigCodes cc[] = {
-    { 0xa1, 0xb1 },
-    { 0xa3, 0xb3 },
-    { 0xa5, 0xb5 },
-    { 0xa6, 0xb6 },
-    { 0xa9, 0xb9 },
-    { 0xaa, 0xba },
-    { 0xab, 0xbb },
-    { 0xac, 0xbc },
-    { 0xae, 0xbe },
-
-    { 0xb1, 0xa1 },
-    { 0xb3, 0xa3 },
-    { 0xb5, 0xa5 },
-    { 0xb6, 0xa6 },
-    { 0xb9, 0xa9 },
-    { 0xba, 0xaa },
-    { 0xbb, 0xab },
-    { 0xbc, 0xac },
-    { 0xbe, 0xae },
-
-    { 0xc0, 0xe0 },
-    { 0xc1, 0xe1 },
-    { 0xc2, 0xe2 },
-    { 0xc3, 0xe3 },
-    { 0xc4, 0xe4 },
-    { 0xc5, 0xe5 },
-    { 0xc6, 0xe6 },
-    { 0xc7, 0xe7 },
-    { 0xc8, 0xe8 },
-    { 0xc9, 0xe9 },
-    { 0xca, 0xea },
-    { 0xcb, 0xeb },
-    { 0xcc, 0xec },
-    { 0xcd, 0xed },
-    { 0xce, 0xee },
-    { 0xcf, 0xef },
-
-    { 0xd0, 0xf0 },
-    { 0xd1, 0xf1 },
-    { 0xd2, 0xf2 },
-    { 0xd3, 0xf3 },
-    { 0xd4, 0xf4 },
-    { 0xd5, 0xf5 },
-    { 0xd6, 0xf6 },
-    { 0xd8, 0xf8 },
-    { 0xd9, 0xf9 },
-    { 0xda, 0xfa },
-    { 0xdb, 0xfb },
-    { 0xdc, 0xfc },
-    { 0xdd, 0xfd },
-    { 0xde, 0xfe },
-
-    { 0xe0, 0xc0 },
-    { 0xe1, 0xc1 },
-    { 0xe2, 0xc2 },
-    { 0xe3, 0xc3 },
-    { 0xe4, 0xc4 },
-    { 0xe5, 0xc5 },
-    { 0xe6, 0xc6 },
-    { 0xe7, 0xc7 },
-    { 0xe8, 0xc8 },
-    { 0xe9, 0xc9 },
-    { 0xea, 0xca },
-    { 0xeb, 0xcb },
-    { 0xec, 0xcc },
-    { 0xed, 0xcd },
-    { 0xee, 0xce },
-    { 0xef, 0xcf },
-
-    { 0xf0, 0xd0 },
-    { 0xf1, 0xd1 },
-    { 0xf2, 0xd2 },
-    { 0xf3, 0xd3 },
-    { 0xf4, 0xd4 },
-    { 0xf5, 0xd5 },
-    { 0xf6, 0xd6 },
-    { 0xf8, 0xd8 },
-    { 0xf9, 0xd9 },
-    { 0xfa, 0xda },
-    { 0xfb, 0xdb },
-    { 0xfc, 0xdc },
-    { 0xfd, 0xdd },
-    { 0xfe, 0xde }
-  };
+  return onigenc_apply_all_case_fold_with_map(
+             sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 1,
+             flag, f, arg);
+}
 
-  if (flag == ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) {
-    *ccs = OnigAsciiPairAmbigCodes;
-    return 52;
-  }
-  if (flag == ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) {
-    *ccs = cc;
-    return sizeof(cc) / sizeof(OnigPairAmbigCodes);
-  }
-  else
-    return 0;
+static int
+get_case_fold_codes_by_str(OnigCaseFoldType flag,
+    const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[])
+{
+  return onigenc_get_case_fold_codes_by_str_with_map(
+            sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 1,
+            flag, p, end, items);
 }
 
 OnigEncodingType OnigEncodingISO_8859_4 = {
@@ -265,25 +222,15 @@ OnigEncodingType OnigEncodingISO_8859_4 = {
   "ISO-8859-4",  /* name */
   1,             /* max enc length */
   1,             /* min enc length */
-  (ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE |
-   ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE ),
-  {
-      (OnigCodePoint )'\\'                       /* esc */
-    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.'  */
-    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*'  */
-    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
-    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
-    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
-  },
   onigenc_is_mbc_newline_0x0a,
   onigenc_single_byte_mbc_to_code,
   onigenc_single_byte_code_to_mbclen,
   onigenc_single_byte_code_to_mbc,
-  iso_8859_4_mbc_to_normalize,
-  iso_8859_4_is_mbc_ambiguous,
-  iso_8859_4_get_all_pair_ambig_codes,
-  onigenc_ess_tsett_get_all_comp_ambig_codes,
-  iso_8859_4_is_code_ctype,
+  mbc_case_fold,
+  apply_all_case_fold,
+  get_case_fold_codes_by_str,
+  onigenc_minimum_property_name_to_ctype,
+  is_code_ctype,
   onigenc_not_support_get_ctype_code_range,
   onigenc_single_byte_left_adjust_char_head,
   onigenc_always_true_is_allowed_reverse_match
index 87b7fb8a29de6b268fed0c6811f08897a665ca2b..1ca67e735f197be9971dc576a77616d798bcfe1b 100644 (file)
@@ -2,7 +2,7 @@
   iso8859_5.c -  Oniguruma (regular expression library)
 **********************************************************************/
 /*-
- * Copyright (c) 2002-2005  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp>
+ * Copyright (c) 2002-2008  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -31,7 +31,7 @@
 
 #define ENC_ISO_8859_5_TO_LOWER_CASE(c) EncISO_8859_5_ToLowerCaseTable[c]
 #define ENC_IS_ISO_8859_5_CTYPE(code,ctype) \
-  ((EncISO_8859_5_CtypeTable[code] & ctype) != 0)
+  ((EncISO_8859_5_CtypeTable[code] & CTYPE_TO_BIT(ctype)) != 0)
 
 static const UChar EncISO_8859_5_ToLowerCaseTable[256] = {
   '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
@@ -69,80 +69,66 @@ static const UChar EncISO_8859_5_ToLowerCaseTable[256] = {
 };
 
 static const unsigned short EncISO_8859_5_CtypeTable[256] = {
-  0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
-  0x2008, 0x220c, 0x2209, 0x2208, 0x2208, 0x2208, 0x2008, 0x2008,
-  0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
-  0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
-  0x2284, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
-  0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
-  0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0,
-  0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
-  0x21a0, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x34a2,
-  0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
-  0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
-  0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x31a0,
-  0x21a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2,
-  0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
-  0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
-  0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x2008,
+  0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
+  0x4008, 0x420c, 0x4209, 0x4208, 0x4208, 0x4208, 0x4008, 0x4008,
+  0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
+  0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
+  0x4284, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
+  0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
+  0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0,
+  0x78b0, 0x78b0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
+  0x41a0, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x74a2,
+  0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2,
+  0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2,
+  0x74a2, 0x74a2, 0x74a2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x51a0,
+  0x41a0, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x70e2,
+  0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2,
+  0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2,
+  0x70e2, 0x70e2, 0x70e2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x4008,
   0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
   0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
   0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
   0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
-  0x0284, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2,
-  0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x01a0, 0x14a2, 0x14a2,
-  0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2,
-  0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2,
-  0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2,
-  0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2,
-  0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2,
-  0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2,
-  0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2,
-  0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2,
-  0x00a0, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2,
-  0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x00a0, 0x10e2, 0x10e2
+  0x0284, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
+  0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x01a0, 0x34a2, 0x34a2,
+  0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
+  0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
+  0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
+  0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
+  0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
+  0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
+  0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
+  0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
+  0x00a0, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
+  0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x00a0, 0x30e2, 0x30e2
 };
 
 static int
-iso_8859_5_mbc_to_normalize(OnigAmbigType flag,
-                            const UChar** pp, const UChar* end, UChar* lower)
+mbc_case_fold(OnigCaseFoldType flag ARG_UNUSED,
+             const UChar** pp, const UChar* end ARG_UNUSED, UChar* lower)
 {
   const UChar* p = *pp;
 
-  if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
-       ONIGENC_IS_MBC_ASCII(p)) ||
-      ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 &&
-       !ONIGENC_IS_MBC_ASCII(p))) {
-    *lower = ENC_ISO_8859_5_TO_LOWER_CASE(*p);
-  }
-  else {
-    *lower = *p;
-  }
-
+  *lower = ENC_ISO_8859_5_TO_LOWER_CASE(*p);
   (*pp)++;
-  return 1; /* return byte length of converted char to lower */
+  return 1;
 }
 
+#if 0
 static int
-iso_8859_5_is_mbc_ambiguous(OnigAmbigType flag,
-                           const UChar** pp, const UChar* end)
+is_mbc_ambiguous(OnigCaseFoldType flag, const UChar** pp, const UChar* end)
 {
+  int v;
   const UChar* p = *pp;
 
   (*pp)++;
-  if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
-       ONIGENC_IS_MBC_ASCII(p)) ||
-      ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 &&
-       !ONIGENC_IS_MBC_ASCII(p))) {
-    int v = (EncISO_8859_5_CtypeTable[*p] &
-             (ONIGENC_CTYPE_UPPER | ONIGENC_CTYPE_LOWER));
-    return (v != 0 ? TRUE : FALSE);
-  }
-  return FALSE;
+  v = (EncISO_8859_5_CtypeTable[*p] & (BIT_CTYPE_UPPER | BIT_CTYPE_LOWER));
+  return (v != 0 ? TRUE : FALSE);
 }
+#endif
 
 static int
-iso_8859_5_is_code_ctype(OnigCodePoint code, unsigned int ctype)
+is_code_ctype(OnigCodePoint code, unsigned int ctype)
 {
   if (code < 256)
     return ENC_IS_ISO_8859_5_CTYPE(code, ctype);
@@ -150,120 +136,74 @@ iso_8859_5_is_code_ctype(OnigCodePoint code, unsigned int ctype)
     return FALSE;
 }
 
+static const OnigPairCaseFoldCodes CaseFoldMap[] = {
+ { 0xa1, 0xf1 },
+ { 0xa2, 0xf2 },
+ { 0xa3, 0xf3 },
+ { 0xa4, 0xf4 },
+ { 0xa5, 0xf5 },
+ { 0xa6, 0xf6 },
+ { 0xa7, 0xf7 },
+ { 0xa8, 0xf8 },
+ { 0xa9, 0xf9 },
+ { 0xaa, 0xfa },
+ { 0xab, 0xfb },
+ { 0xac, 0xfc },
+ { 0xae, 0xfe },
+ { 0xaf, 0xff },
+
+ { 0xb0, 0xd0 },
+ { 0xb1, 0xd1 },
+ { 0xb2, 0xd2 },
+ { 0xb3, 0xd3 },
+ { 0xb4, 0xd4 },
+ { 0xb5, 0xd5 },
+ { 0xb6, 0xd6 },
+ { 0xb7, 0xd7 },
+ { 0xb8, 0xd8 },
+ { 0xb9, 0xd9 },
+ { 0xba, 0xda },
+ { 0xbb, 0xdb },
+ { 0xbc, 0xdc },
+ { 0xbd, 0xdd },
+ { 0xbe, 0xde },
+ { 0xbf, 0xdf },
+
+ { 0xc0, 0xe0 },
+ { 0xc1, 0xe1 },
+ { 0xc2, 0xe2 },
+ { 0xc3, 0xe3 },
+ { 0xc4, 0xe4 },
+ { 0xc5, 0xe5 },
+ { 0xc6, 0xe6 },
+ { 0xc7, 0xe7 },
+ { 0xc8, 0xe8 },
+ { 0xc9, 0xe9 },
+ { 0xca, 0xea },
+ { 0xcb, 0xeb },
+ { 0xcc, 0xec },
+ { 0xcd, 0xed },
+ { 0xce, 0xee },
+ { 0xcf, 0xef }
+};
+
 static int
-iso_8859_5_get_all_pair_ambig_codes(OnigAmbigType flag,
-                                    const OnigPairAmbigCodes** ccs)
+apply_all_case_fold(OnigCaseFoldType flag,
+                   OnigApplyAllCaseFoldFunc f, void* arg)
 {
-  static const OnigPairAmbigCodes cc[] = {
-    { 0xa1, 0xf1 },
-    { 0xa2, 0xf2 },
-    { 0xa3, 0xf3 },
-    { 0xa4, 0xf4 },
-    { 0xa5, 0xf5 },
-    { 0xa6, 0xf6 },
-    { 0xa7, 0xf7 },
-    { 0xa8, 0xf8 },
-    { 0xa9, 0xf9 },
-    { 0xaa, 0xfa },
-    { 0xab, 0xfb },
-    { 0xac, 0xfc },
-    { 0xae, 0xfe },
-    { 0xaf, 0xff },
-
-    { 0xb0, 0xd0 },
-    { 0xb1, 0xd1 },
-    { 0xb2, 0xd2 },
-    { 0xb3, 0xd3 },
-    { 0xb4, 0xd4 },
-    { 0xb5, 0xd5 },
-    { 0xb6, 0xd6 },
-    { 0xb7, 0xd7 },
-    { 0xb8, 0xd8 },
-    { 0xb9, 0xd9 },
-    { 0xba, 0xda },
-    { 0xbb, 0xdb },
-    { 0xbc, 0xdc },
-    { 0xbd, 0xdd },
-    { 0xbe, 0xdf },
-    { 0xbf, 0xdf },
-
-    { 0xc0, 0xe0 },
-    { 0xc1, 0xe1 },
-    { 0xc2, 0xe2 },
-    { 0xc3, 0xe3 },
-    { 0xc4, 0xe4 },
-    { 0xc5, 0xe5 },
-    { 0xc6, 0xe6 },
-    { 0xc7, 0xe7 },
-    { 0xc8, 0xe8 },
-    { 0xc9, 0xe9 },
-    { 0xca, 0xea },
-    { 0xcb, 0xeb },
-    { 0xcc, 0xec },
-    { 0xcd, 0xed },
-    { 0xce, 0xee },
-    { 0xcf, 0xef },
-
-    { 0xd0, 0xb0 },
-    { 0xd1, 0xb1 },
-    { 0xd2, 0xb2 },
-    { 0xd3, 0xb3 },
-    { 0xd4, 0xb4 },
-    { 0xd5, 0xb5 },
-    { 0xd6, 0xb6 },
-    { 0xd7, 0xb7 },
-    { 0xd8, 0xb8 },
-    { 0xd9, 0xb9 },
-    { 0xda, 0xba },
-    { 0xdb, 0xbb },
-    { 0xdc, 0xbc },
-    { 0xdd, 0xbd },
-    { 0xde, 0xbe },
-    { 0xdf, 0xbf },
-
-    { 0xe0, 0xc0 },
-    { 0xe1, 0xc1 },
-    { 0xe2, 0xc2 },
-    { 0xe3, 0xc3 },
-    { 0xe4, 0xc4 },
-    { 0xe5, 0xc5 },
-    { 0xe6, 0xc6 },
-    { 0xe7, 0xc7 },
-    { 0xe8, 0xc8 },
-    { 0xe9, 0xc9 },
-    { 0xea, 0xca },
-    { 0xeb, 0xcb },
-    { 0xec, 0xcc },
-    { 0xed, 0xcd },
-    { 0xee, 0xce },
-    { 0xef, 0xcf },
-
-    { 0xf1, 0xa1 },
-    { 0xf2, 0xa2 },
-    { 0xf3, 0xa3 },
-    { 0xf4, 0xa4 },
-    { 0xf5, 0xa5 },
-    { 0xf6, 0xa6 },
-    { 0xf7, 0xa7 },
-    { 0xf8, 0xa8 },
-    { 0xf9, 0xa9 },
-    { 0xfa, 0xaa },
-    { 0xfb, 0xab },
-    { 0xfc, 0xac },
-    { 0xfe, 0xae },
-    { 0xff, 0xaf }
-  };
+  return onigenc_apply_all_case_fold_with_map(
+             sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 0,
+             flag, f, arg);
+}
 
-  if (flag == ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) {
-    *ccs = OnigAsciiPairAmbigCodes;
-    return 52;
-  }
-  if (flag == ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) {
-    *ccs = cc;
-    return sizeof(cc) / sizeof(OnigPairAmbigCodes);
-  }
-  else
-    return 0;
+static int
+get_case_fold_codes_by_str(OnigCaseFoldType flag,
+                          const OnigUChar* p, const OnigUChar* end,
+                          OnigCaseFoldCodeItem items[])
+{
+  return onigenc_get_case_fold_codes_by_str_with_map(
+            sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 0,
+            flag, p, end, items);
 }
 
 OnigEncodingType OnigEncodingISO_8859_5 = {
@@ -271,25 +211,15 @@ OnigEncodingType OnigEncodingISO_8859_5 = {
   "ISO-8859-5",  /* name */
   1,             /* max enc length */
   1,             /* min enc length */
-  (ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE |
-   ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE ),
-  {
-      (OnigCodePoint )'\\'                       /* esc */
-    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.'  */
-    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*'  */
-    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
-    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
-    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
-  },
   onigenc_is_mbc_newline_0x0a,
   onigenc_single_byte_mbc_to_code,
   onigenc_single_byte_code_to_mbclen,
   onigenc_single_byte_code_to_mbc,
-  iso_8859_5_mbc_to_normalize,
-  iso_8859_5_is_mbc_ambiguous,
-  iso_8859_5_get_all_pair_ambig_codes,
-  onigenc_nothing_get_all_comp_ambig_codes,
-  iso_8859_5_is_code_ctype,
+  mbc_case_fold,
+  apply_all_case_fold,
+  get_case_fold_codes_by_str,
+  onigenc_minimum_property_name_to_ctype,
+  is_code_ctype,
   onigenc_not_support_get_ctype_code_range,
   onigenc_single_byte_left_adjust_char_head,
   onigenc_always_true_is_allowed_reverse_match
index fffcd0e7d19d1ea230b9fac5cbd28495b426b475..ab42eeed3191bf3bb240d8c4d00a88de2e22e196 100644 (file)
@@ -2,7 +2,7 @@
   iso8859_6.c -  Oniguruma (regular expression library)
 **********************************************************************/
 /*-
- * Copyright (c) 2002-2005  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp>
+ * Copyright (c) 2002-2007  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
 #include "regenc.h"
 
 #define ENC_IS_ISO_8859_6_CTYPE(code,ctype) \
-  ((EncISO_8859_6_CtypeTable[code] & ctype) != 0)
+  ((EncISO_8859_6_CtypeTable[code] & CTYPE_TO_BIT(ctype)) != 0)
 
 static const unsigned short EncISO_8859_6_CtypeTable[256] = {
-  0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
-  0x2008, 0x220c, 0x2209, 0x2208, 0x2208, 0x2208, 0x2008, 0x2008,
-  0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
-  0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
-  0x2284, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
-  0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
-  0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0,
-  0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
-  0x21a0, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x34a2,
-  0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
-  0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
-  0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x31a0,
-  0x21a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2,
-  0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
-  0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
-  0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x2008,
+  0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
+  0x4008, 0x420c, 0x4209, 0x4208, 0x4208, 0x4208, 0x4008, 0x4008,
+  0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
+  0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
+  0x4284, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
+  0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
+  0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0,
+  0x78b0, 0x78b0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
+  0x41a0, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x74a2,
+  0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2,
+  0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2,
+  0x74a2, 0x74a2, 0x74a2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x51a0,
+  0x41a0, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x70e2,
+  0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2,
+  0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2,
+  0x70e2, 0x70e2, 0x70e2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x4008,
   0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
   0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
   0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
@@ -57,18 +57,18 @@ static const unsigned short EncISO_8859_6_CtypeTable[256] = {
   0x0000, 0x0000, 0x0000, 0x0000, 0x01a0, 0x01a0, 0x0000, 0x0000,
   0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
   0x0000, 0x0000, 0x0000, 0x01a0, 0x0000, 0x0000, 0x0000, 0x01a0,
-  0x0000, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2,
-  0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2,
-  0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2,
-  0x10a2, 0x10a2, 0x10a2, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
-  0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2,
-  0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2,
-  0x10a2, 0x10a2, 0x10a2, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+  0x0000, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2,
+  0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2,
+  0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2,
+  0x30a2, 0x30a2, 0x30a2, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+  0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2,
+  0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2,
+  0x30a2, 0x30a2, 0x30a2, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
   0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000
 };
 
 static int
-iso_8859_6_is_code_ctype(OnigCodePoint code, unsigned int ctype)
+is_code_ctype(OnigCodePoint code, unsigned int ctype)
 {
   if (code < 256)
     return ENC_IS_ISO_8859_6_CTYPE(code, ctype);
@@ -81,24 +81,15 @@ OnigEncodingType OnigEncodingISO_8859_6 = {
   "ISO-8859-6",  /* name */
   1,             /* max enc length */
   1,             /* min enc length */
-  ( ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE ),
-  {
-      (OnigCodePoint )'\\'                       /* esc */
-    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.'  */
-    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*'  */
-    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
-    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
-    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
-  },
   onigenc_is_mbc_newline_0x0a,
   onigenc_single_byte_mbc_to_code,
   onigenc_single_byte_code_to_mbclen,
   onigenc_single_byte_code_to_mbc,
-  onigenc_ascii_mbc_to_normalize,
-  onigenc_ascii_is_mbc_ambiguous,
-  onigenc_ascii_get_all_pair_ambig_codes,
-  onigenc_nothing_get_all_comp_ambig_codes,
-  iso_8859_6_is_code_ctype,
+  onigenc_ascii_mbc_case_fold,
+  onigenc_ascii_apply_all_case_fold,
+  onigenc_ascii_get_case_fold_codes_by_str,
+  onigenc_minimum_property_name_to_ctype,
+  is_code_ctype,
   onigenc_not_support_get_ctype_code_range,
   onigenc_single_byte_left_adjust_char_head,
   onigenc_always_true_is_allowed_reverse_match
index e87661d84bb5ef6243eddf882ebc635233545836..1090064d7438381e10ad36a675b12db2fe46111e 100644 (file)
@@ -2,7 +2,7 @@
   iso8859_7.c -  Oniguruma (regular expression library)
 **********************************************************************/
 /*-
- * Copyright (c) 2002-2005  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp>
+ * Copyright (c) 2002-2007  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -31,7 +31,7 @@
 
 #define ENC_ISO_8859_7_TO_LOWER_CASE(c) EncISO_8859_7_ToLowerCaseTable[c]
 #define ENC_IS_ISO_8859_7_CTYPE(code,ctype) \
-  ((EncISO_8859_7_CtypeTable[code] & ctype) != 0)
+  ((EncISO_8859_7_CtypeTable[code] & CTYPE_TO_BIT(ctype)) != 0)
 
 static const UChar EncISO_8859_7_ToLowerCaseTable[256] = {
   '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
@@ -69,87 +69,74 @@ static const UChar EncISO_8859_7_ToLowerCaseTable[256] = {
 };
 
 static const unsigned short EncISO_8859_7_CtypeTable[256] = {
-  0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
-  0x2008, 0x220c, 0x2209, 0x2208, 0x2208, 0x2208, 0x2008, 0x2008,
-  0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
-  0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
-  0x2284, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
-  0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
-  0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0,
-  0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
-  0x21a0, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x34a2,
-  0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
-  0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
-  0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x31a0,
-  0x21a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2,
-  0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
-  0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
-  0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x2008,
+  0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
+  0x4008, 0x420c, 0x4209, 0x4208, 0x4208, 0x4208, 0x4008, 0x4008,
+  0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
+  0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
+  0x4284, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
+  0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
+  0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0,
+  0x78b0, 0x78b0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
+  0x41a0, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x74a2,
+  0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2,
+  0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2,
+  0x74a2, 0x74a2, 0x74a2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x51a0,
+  0x41a0, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x70e2,
+  0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2,
+  0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2,
+  0x70e2, 0x70e2, 0x70e2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x4008,
   0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
   0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
   0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
   0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
   0x0284, 0x01a0, 0x01a0, 0x00a0, 0x0000, 0x0000, 0x00a0, 0x00a0,
   0x00a0, 0x00a0, 0x0000, 0x01a0, 0x00a0, 0x01a0, 0x0000, 0x01a0,
-  0x00a0, 0x00a0, 0x10a0, 0x10a0, 0x00a0, 0x00a0, 0x14a2, 0x01a0,
-  0x14a2, 0x14a2, 0x14a2, 0x01a0, 0x14a2, 0x10a0, 0x14a2, 0x14a2,
-  0x10e2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2,
-  0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2,
-  0x14a2, 0x14a2, 0x0000, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2,
-  0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x10e2, 0x10e2, 0x10e2, 0x10e2,
-  0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2,
-  0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2,
-  0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2,
-  0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x0000
+  0x00a0, 0x00a0, 0x10a0, 0x10a0, 0x00a0, 0x00a0, 0x34a2, 0x01a0,
+  0x34a2, 0x34a2, 0x34a2, 0x01a0, 0x34a2, 0x10a0, 0x34a2, 0x34a2,
+  0x30e2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
+  0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
+  0x34a2, 0x34a2, 0x0000, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
+  0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
+  0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
+  0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
+  0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
+  0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x0000
 };
 
 static int
-iso_8859_7_mbc_to_normalize(OnigAmbigType flag,
-                            const UChar** pp, const UChar* end, UChar* lower)
+mbc_case_fold(OnigCaseFoldType flag ARG_UNUSED,
+             const UChar** pp, const UChar* end ARG_UNUSED, UChar* lower)
 {
   const UChar* p = *pp;
 
-  if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
-       ONIGENC_IS_MBC_ASCII(p)) ||
-      ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 &&
-       !ONIGENC_IS_MBC_ASCII(p))) {
-    *lower = ENC_ISO_8859_7_TO_LOWER_CASE(*p);
-  }
-  else {
-    *lower = *p;
-  }
+  *lower = ENC_ISO_8859_7_TO_LOWER_CASE(*p);
   (*pp)++;
-  return 1; /* return byte length of converted char to lower */
+  return 1;
 }
 
+#if 0
 static int
-iso_8859_7_is_mbc_ambiguous(OnigAmbigType flag,
-                           const UChar** pp, const UChar* end)
+is_mbc_ambiguous(OnigCaseFoldType flag,
+                const UChar** pp, const UChar* end)
 {
+  int v;
   const UChar* p = *pp;
 
   (*pp)++;
-  if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
-       ONIGENC_IS_MBC_ASCII(p)) ||
-      ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 &&
-       !ONIGENC_IS_MBC_ASCII(p))) {
-    int v = (EncISO_8859_7_CtypeTable[*p] &
-             (ONIGENC_CTYPE_UPPER | ONIGENC_CTYPE_LOWER));
-
-    if ((v | ONIGENC_CTYPE_LOWER) != 0) {
-      if (*p == 0xc0 || *p == 0xe0)
-        return FALSE;
-      else
-        return TRUE;
-    }
-
-    return (v != 0 ? TRUE : FALSE);
+  v = (EncISO_8859_7_CtypeTable[*p] & (BIT_CTYPE_UPPER | BIT_CTYPE_LOWER));
+  if ((v | BIT_CTYPE_LOWER) != 0) {
+    if (*p == 0xc0 || *p == 0xe0)
+      return FALSE;
+    else
+      return TRUE;
   }
-  return FALSE;
+
+  return (v != 0 ? TRUE : FALSE);
 }
+#endif
 
 static int
-iso_8859_7_is_code_ctype(OnigCodePoint code, unsigned int ctype)
+is_code_ctype(OnigCodePoint code, unsigned int ctype)
 {
   if (code < 256)
     return ENC_IS_ISO_8859_7_CTYPE(code, ctype);
@@ -157,121 +144,78 @@ iso_8859_7_is_code_ctype(OnigCodePoint code, unsigned int ctype)
     return FALSE;
 }
 
+static const OnigPairCaseFoldCodes CaseFoldMap[] = {
+ { 0xb6, 0xdc },
+ { 0xb8, 0xdd },
+ { 0xb9, 0xde },
+ { 0xba, 0xdf },
+ { 0xbc, 0xfc },
+ { 0xbe, 0xfd },
+ { 0xbf, 0xfe },
+
+ { 0xc1, 0xe1 },
+ { 0xc2, 0xe2 },
+ { 0xc3, 0xe3 },
+ { 0xc4, 0xe4 },
+ { 0xc5, 0xe5 },
+ { 0xc6, 0xe6 },
+ { 0xc7, 0xe7 },
+ { 0xc8, 0xe8 },
+ { 0xc9, 0xe9 },
+ { 0xca, 0xea },
+ { 0xcb, 0xeb },
+ { 0xcc, 0xec },
+ { 0xcd, 0xed },
+ { 0xce, 0xee },
+ { 0xcf, 0xef },
+
+ { 0xd0, 0xf0 },
+ { 0xd1, 0xf1 },
+ { 0xd2, 0xf2 },
+ { 0xd3, 0xf3 },
+ { 0xd4, 0xf4 },
+ { 0xd5, 0xf5 },
+ { 0xd6, 0xf6 },
+ { 0xd7, 0xf7 },
+ { 0xd8, 0xf8 },
+ { 0xd9, 0xf9 },
+ { 0xda, 0xfa },
+ { 0xdb, 0xfb }
+};
+
 static int
-iso_8859_7_get_all_pair_ambig_codes(OnigAmbigType flag,
-                                    const OnigPairAmbigCodes** ccs)
+apply_all_case_fold(OnigCaseFoldType flag,
+                   OnigApplyAllCaseFoldFunc f, void* arg)
 {
-  static const OnigPairAmbigCodes cc[] = {
-    { 0xb6, 0xdc },
-    { 0xb8, 0xdd },
-    { 0xb9, 0xde },
-    { 0xba, 0xdf },
-    { 0xbc, 0xfc },
-    { 0xbe, 0xfd },
-    { 0xbf, 0xfe },
-
-    { 0xc1, 0xe1 },
-    { 0xc2, 0xe2 },
-    { 0xc3, 0xe3 },
-    { 0xc4, 0xe4 },
-    { 0xc5, 0xe5 },
-    { 0xc6, 0xe6 },
-    { 0xc7, 0xe7 },
-    { 0xc8, 0xe8 },
-    { 0xc9, 0xe9 },
-    { 0xca, 0xea },
-    { 0xcb, 0xeb },
-    { 0xcc, 0xec },
-    { 0xcd, 0xed },
-    { 0xce, 0xee },
-    { 0xcf, 0xef },
-
-    { 0xd0, 0xf0 },
-    { 0xd1, 0xf1 },
-    { 0xd2, 0xf2 },
-    { 0xd3, 0xf3 },
-    { 0xd4, 0xf4 },
-    { 0xd5, 0xf5 },
-    { 0xd6, 0xf6 },
-    { 0xd7, 0xf7 },
-    { 0xd8, 0xf8 },
-    { 0xd9, 0xf9 },
-    { 0xda, 0xfa },
-    { 0xdb, 0xfb },
-    { 0xdc, 0xb6 },
-    { 0xdd, 0xb8 },
-    { 0xde, 0xb9 },
-    { 0xdf, 0xba },
-
-    { 0xe1, 0xc1 },
-    { 0xe2, 0xc2 },
-    { 0xe3, 0xc3 },
-    { 0xe4, 0xc4 },
-    { 0xe5, 0xc5 },
-    { 0xe6, 0xc6 },
-    { 0xe7, 0xc7 },
-    { 0xe8, 0xc8 },
-    { 0xe9, 0xc9 },
-    { 0xea, 0xca },
-    { 0xeb, 0xcb },
-    { 0xec, 0xcc },
-    { 0xed, 0xcd },
-    { 0xee, 0xce },
-    { 0xef, 0xcf },
-
-    { 0xf0, 0xd0 },
-    { 0xf1, 0xd1 },
-    { 0xf2, 0xd2 },
-    { 0xf3, 0xd3 },
-    { 0xf4, 0xd4 },
-    { 0xf5, 0xd5 },
-    { 0xf6, 0xd6 },
-    { 0xf7, 0xd7 },
-    { 0xf8, 0xd8 },
-    { 0xf9, 0xd9 },
-    { 0xfa, 0xda },
-    { 0xfb, 0xdb },
-    { 0xfc, 0xbc },
-    { 0xfd, 0xbe },
-    { 0xfe, 0xbf }
-  };
+  return onigenc_apply_all_case_fold_with_map(
+             sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 0,
+             flag, f, arg);
+}
 
-  if (flag == ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) {
-    *ccs = OnigAsciiPairAmbigCodes;
-    return 52;
-  }
-  if (flag == ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) {
-    *ccs = cc;
-    return sizeof(cc) / sizeof(OnigPairAmbigCodes);
-  }
-  else
-    return 0;
+static int
+get_case_fold_codes_by_str(OnigCaseFoldType flag,
+   const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[])
+{
+  return onigenc_get_case_fold_codes_by_str_with_map(
+            sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 0,
+            flag, p, end, items);
 }
 
+
 OnigEncodingType OnigEncodingISO_8859_7 = {
   onigenc_single_byte_mbc_enc_len,
   "ISO-8859-7",  /* name */
   1,             /* max enc length */
   1,             /* min enc length */
-  (ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE |
-   ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE ),
-  {
-      (OnigCodePoint )'\\'                       /* esc */
-    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.'  */
-    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*'  */
-    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
-    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
-    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
-  },
   onigenc_is_mbc_newline_0x0a,
   onigenc_single_byte_mbc_to_code,
   onigenc_single_byte_code_to_mbclen,
   onigenc_single_byte_code_to_mbc,
-  iso_8859_7_mbc_to_normalize,
-  iso_8859_7_is_mbc_ambiguous,
-  iso_8859_7_get_all_pair_ambig_codes,
-  onigenc_nothing_get_all_comp_ambig_codes,
-  iso_8859_7_is_code_ctype,
+  mbc_case_fold,
+  apply_all_case_fold,
+  get_case_fold_codes_by_str,
+  onigenc_minimum_property_name_to_ctype,
+  is_code_ctype,
   onigenc_not_support_get_ctype_code_range,
   onigenc_single_byte_left_adjust_char_head,
   onigenc_always_true_is_allowed_reverse_match
index e76966c667f63b88be02941fd01a984c65cee995..fb9846f25f484ccffcae7745e71d79cbd5a2b6bf 100644 (file)
@@ -2,7 +2,7 @@
   iso8859_8.c -  Oniguruma (regular expression library)
 **********************************************************************/
 /*-
- * Copyright (c) 2002-2004  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp>
+ * Copyright (c) 2002-2007  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
 #include "regenc.h"
 
 #define ENC_IS_ISO_8859_8_CTYPE(code,ctype) \
-  ((EncISO_8859_8_CtypeTable[code] & ctype) != 0)
+  ((EncISO_8859_8_CtypeTable[code] & CTYPE_TO_BIT(ctype)) != 0)
 
 static const unsigned short EncISO_8859_8_CtypeTable[256] = {
-  0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
-  0x2008, 0x220c, 0x2209, 0x2208, 0x2208, 0x2208, 0x2008, 0x2008,
-  0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
-  0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
-  0x2284, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
-  0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
-  0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0,
-  0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
-  0x21a0, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x34a2,
-  0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
-  0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
-  0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x31a0,
-  0x21a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2,
-  0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
-  0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
-  0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x2008,
+  0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
+  0x4008, 0x420c, 0x4209, 0x4208, 0x4208, 0x4208, 0x4008, 0x4008,
+  0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
+  0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
+  0x4284, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
+  0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
+  0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0,
+  0x78b0, 0x78b0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
+  0x41a0, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x74a2,
+  0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2,
+  0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2,
+  0x74a2, 0x74a2, 0x74a2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x51a0,
+  0x41a0, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x70e2,
+  0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2,
+  0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2,
+  0x70e2, 0x70e2, 0x70e2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x4008,
   0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
   0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
   0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
   0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
   0x0284, 0x0000, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0,
   0x00a0, 0x00a0, 0x00a0, 0x01a0, 0x00a0, 0x01a0, 0x00a0, 0x00a0,
-  0x00a0, 0x00a0, 0x10a0, 0x10a0, 0x00a0, 0x10e2, 0x00a0, 0x01a0,
+  0x00a0, 0x00a0, 0x10a0, 0x10a0, 0x00a0, 0x30e2, 0x00a0, 0x01a0,
   0x00a0, 0x10a0, 0x00a0, 0x01a0, 0x10a0, 0x10a0, 0x10a0, 0x0000,
   0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
   0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
   0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
   0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x01a0,
-  0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2,
-  0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2,
-  0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2, 0x10a2,
-  0x10a2, 0x10a2, 0x10a2, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000
+  0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2,
+  0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2,
+  0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2, 0x30a2,
+  0x30a2, 0x30a2, 0x30a2, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000
 };
 
 static int
-iso_8859_8_is_code_ctype(OnigCodePoint code, unsigned int ctype)
+is_code_ctype(OnigCodePoint code, unsigned int ctype)
 {
   if (code < 256)
     return ENC_IS_ISO_8859_8_CTYPE(code, ctype);
@@ -81,24 +81,15 @@ OnigEncodingType OnigEncodingISO_8859_8 = {
   "ISO-8859-8",  /* name */
   1,             /* max enc length */
   1,             /* min enc length */
-  ( ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE ),
-  {
-      (OnigCodePoint )'\\'                       /* esc */
-    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.'  */
-    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*'  */
-    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
-    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
-    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
-  },
   onigenc_is_mbc_newline_0x0a,
   onigenc_single_byte_mbc_to_code,
   onigenc_single_byte_code_to_mbclen,
   onigenc_single_byte_code_to_mbc,
-  onigenc_ascii_mbc_to_normalize,
-  onigenc_ascii_is_mbc_ambiguous,
-  onigenc_ascii_get_all_pair_ambig_codes,
-  onigenc_nothing_get_all_comp_ambig_codes,
-  iso_8859_8_is_code_ctype,
+  onigenc_ascii_mbc_case_fold,
+  onigenc_ascii_apply_all_case_fold,
+  onigenc_ascii_get_case_fold_codes_by_str,
+  onigenc_minimum_property_name_to_ctype,
+  is_code_ctype,
   onigenc_not_support_get_ctype_code_range,
   onigenc_single_byte_left_adjust_char_head,
   onigenc_always_true_is_allowed_reverse_match
index 16a30c5f2479d0c1abc74a23e8b85d01198c717c..079d681c21c9da17176a9e90426152550771d582 100644 (file)
@@ -2,7 +2,7 @@
   iso8859_9.c -  Oniguruma (regular expression library)
 **********************************************************************/
 /*-
- * Copyright (c) 2002-2006  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp>
+ * Copyright (c) 2002-2007  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -31,7 +31,7 @@
 
 #define ENC_ISO_8859_9_TO_LOWER_CASE(c) EncISO_8859_9_ToLowerCaseTable[c]
 #define ENC_IS_ISO_8859_9_CTYPE(code,ctype) \
-  ((EncISO_8859_9_CtypeTable[code] & ctype) != 0)
+  ((EncISO_8859_9_CtypeTable[code] & CTYPE_TO_BIT(ctype)) != 0)
 
 static const UChar EncISO_8859_9_ToLowerCaseTable[256] = {
   '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
@@ -69,88 +69,86 @@ static const UChar EncISO_8859_9_ToLowerCaseTable[256] = {
 };
 
 static const unsigned short EncISO_8859_9_CtypeTable[256] = {
-  0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
-  0x2008, 0x220c, 0x2209, 0x2208, 0x2208, 0x2208, 0x2008, 0x2008,
-  0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
-  0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
-  0x2284, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
-  0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
-  0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0,
-  0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
-  0x21a0, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x34a2,
-  0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
-  0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
-  0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x31a0,
-  0x21a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2,
-  0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
-  0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
-  0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x2008,
+  0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
+  0x4008, 0x420c, 0x4209, 0x4208, 0x4208, 0x4208, 0x4008, 0x4008,
+  0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
+  0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
+  0x4284, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
+  0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
+  0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0,
+  0x78b0, 0x78b0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
+  0x41a0, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x74a2,
+  0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2,
+  0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2,
+  0x74a2, 0x74a2, 0x74a2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x51a0,
+  0x41a0, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x70e2,
+  0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2,
+  0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2,
+  0x70e2, 0x70e2, 0x70e2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x4008,
   0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
   0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
   0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
   0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
   0x0284, 0x01a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0,
-  0x00a0, 0x00a0, 0x10e2, 0x01a0, 0x00a0, 0x01a0, 0x00a0, 0x00a0,
-  0x00a0, 0x00a0, 0x10a0, 0x10a0, 0x00a0, 0x10e2, 0x00a0, 0x01a0,
-  0x00a0, 0x10a0, 0x10e2, 0x01a0, 0x10a0, 0x10a0, 0x10a0, 0x01a0,
-  0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2,
-  0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2,
-  0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x00a0,
-  0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x10e2,
-  0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2,
-  0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2,
-  0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x00a0,
-  0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2
+  0x00a0, 0x00a0, 0x30e2, 0x01a0, 0x00a0, 0x01a0, 0x00a0, 0x00a0,
+  0x00a0, 0x00a0, 0x10a0, 0x10a0, 0x00a0, 0x30e2, 0x00a0, 0x01a0,
+  0x00a0, 0x10a0, 0x30e2, 0x01a0, 0x10a0, 0x10a0, 0x10a0, 0x01a0,
+  0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
+  0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
+  0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x00a0,
+  0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x30e2,
+  0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
+  0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
+  0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x00a0,
+  0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2
 };
 
 static int
-iso_8859_9_mbc_to_normalize(OnigAmbigType flag,
-                            const UChar** pp, const UChar* end, UChar* lower)
+mbc_case_fold(OnigCaseFoldType flag,
+             const UChar** pp, const UChar* end ARG_UNUSED, UChar* lower)
 {
   const UChar* p = *pp;
 
-  if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
-       ONIGENC_IS_MBC_ASCII(p)) ||
-      ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 &&
-       !ONIGENC_IS_MBC_ASCII(p))) {
-    *lower = ENC_ISO_8859_9_TO_LOWER_CASE(*p);
-  }
-  else {
-    *lower = *p;
+  if (*p == 0xdf && (flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) {
+    *lower++ = 's';
+    *lower   = 's';
+    (*pp)++;
+    return 2;
   }
+
+  *lower = ENC_ISO_8859_9_TO_LOWER_CASE(*p);
   (*pp)++;
-  return 1; /* return byte length of converted char to lower */
+  return 1;
 }
 
+#if 0
 static int
-iso_8859_9_is_mbc_ambiguous(OnigAmbigType flag,
-                           const UChar** pp, const UChar* end)
+is_mbc_ambiguous(OnigCaseFoldType flag, const UChar** pp, const UChar* end)
 {
+  int v;
   const UChar* p = *pp;
 
-  (*pp)++;
-  if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
-       ONIGENC_IS_MBC_ASCII(p)) ||
-      ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 &&
-       !ONIGENC_IS_MBC_ASCII(p))) {
-    int v = (EncISO_8859_9_CtypeTable[*p] &
-             (ONIGENC_CTYPE_UPPER | ONIGENC_CTYPE_LOWER));
-
-    if ((v | ONIGENC_CTYPE_LOWER) != 0) {
-      /* 0xdf etc..  are lower case letter, but can't convert. */
-      if (*p == 0xdf || (*p >= 0xaa && *p <= 0xba))
-        return FALSE;
-      else
-        return TRUE;
-    }
+  if (*p == 0xdf && (flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) {
+    (*pp)++;
+    return TRUE;
+  }
 
-    return (v != 0 ? TRUE : FALSE);
+  (*pp)++;
+  v = (EncISO_8859_9_CtypeTable[*p] & (BIT_CTYPE_UPPER | BIT_CTYPE_LOWER));
+  if ((v | BIT_CTYPE_LOWER) != 0) {
+    /* 0xdf etc..  are lower case letter, but can't convert. */
+    if (*p >= 0xaa && *p <= 0xba)
+      return FALSE;
+    else
+      return TRUE;
   }
-  return FALSE;
+
+  return (v != 0 ? TRUE : FALSE);
 }
+#endif
 
 static int
-iso_8859_9_is_code_ctype(OnigCodePoint code, unsigned int ctype)
+is_code_ctype(OnigCodePoint code, unsigned int ctype)
 {
   if (code < 256)
     return ENC_IS_ISO_8859_9_CTYPE(code, ctype);
@@ -158,86 +156,56 @@ iso_8859_9_is_code_ctype(OnigCodePoint code, unsigned int ctype)
     return FALSE;
 }
 
+static const OnigPairCaseFoldCodes CaseFoldMap[] = {
+  { 0xc0, 0xe0 },
+  { 0xc1, 0xe1 },
+  { 0xc2, 0xe2 },
+  { 0xc3, 0xe3 },
+  { 0xc4, 0xe4 },
+  { 0xc5, 0xe5 },
+  { 0xc6, 0xe6 },
+  { 0xc7, 0xe7 },
+  { 0xc8, 0xe8 },
+  { 0xc9, 0xe9 },
+  { 0xca, 0xea },
+  { 0xcb, 0xeb },
+  { 0xcc, 0xec },
+  { 0xcd, 0xed },
+  { 0xce, 0xee },
+  { 0xcf, 0xef },
+
+  { 0xd0, 0xf0 },
+  { 0xd1, 0xf1 },
+  { 0xd2, 0xf2 },
+  { 0xd3, 0xf3 },
+  { 0xd4, 0xf4 },
+  { 0xd5, 0xf5 },
+  { 0xd6, 0xf6 },
+  { 0xd8, 0xf8 },
+  { 0xd9, 0xf9 },
+  { 0xda, 0xfa },
+  { 0xdb, 0xfb },
+  { 0xdc, 0xfc },
+  { 0xdd, 0xfd },
+  { 0xde, 0xfe }
+};
+
 static int
-iso_8859_9_get_all_pair_ambig_codes(OnigAmbigType flag,
-                                    const OnigPairAmbigCodes** ccs)
+apply_all_case_fold(OnigCaseFoldType flag,
+                   OnigApplyAllCaseFoldFunc f, void* arg)
 {
-  static const OnigPairAmbigCodes cc[] = {
-    { 0xc0, 0xe0 },
-    { 0xc1, 0xe1 },
-    { 0xc2, 0xe2 },
-    { 0xc3, 0xe3 },
-    { 0xc4, 0xe4 },
-    { 0xc5, 0xe5 },
-    { 0xc6, 0xe6 },
-    { 0xc7, 0xe7 },
-    { 0xc8, 0xe8 },
-    { 0xc9, 0xe9 },
-    { 0xca, 0xea },
-    { 0xcb, 0xeb },
-    { 0xcc, 0xec },
-    { 0xcd, 0xed },
-    { 0xce, 0xee },
-    { 0xcf, 0xef },
-
-    { 0xd0, 0xf0 },
-    { 0xd1, 0xf1 },
-    { 0xd2, 0xf2 },
-    { 0xd3, 0xf3 },
-    { 0xd4, 0xf4 },
-    { 0xd5, 0xf5 },
-    { 0xd6, 0xf6 },
-    { 0xd8, 0xf8 },
-    { 0xd9, 0xf9 },
-    { 0xda, 0xfa },
-    { 0xdb, 0xfb },
-    { 0xdc, 0xfc },
-    { 0xdd, 0xfd },
-    { 0xde, 0xfe },
-
-    { 0xe0, 0xc0 },
-    { 0xe1, 0xc1 },
-    { 0xe2, 0xc2 },
-    { 0xe3, 0xc3 },
-    { 0xe4, 0xc4 },
-    { 0xe5, 0xc5 },
-    { 0xe6, 0xc6 },
-    { 0xe7, 0xc7 },
-    { 0xe8, 0xc8 },
-    { 0xe9, 0xc9 },
-    { 0xea, 0xca },
-    { 0xeb, 0xcb },
-    { 0xec, 0xcc },
-    { 0xed, 0xcd },
-    { 0xee, 0xce },
-    { 0xef, 0xcf },
-
-    { 0xf0, 0xd0 },
-    { 0xf1, 0xd1 },
-    { 0xf2, 0xd2 },
-    { 0xf3, 0xd3 },
-    { 0xf4, 0xd4 },
-    { 0xf5, 0xd5 },
-    { 0xf6, 0xd6 },
-    { 0xf8, 0xd8 },
-    { 0xf9, 0xd9 },
-    { 0xfa, 0xda },
-    { 0xfb, 0xdb },
-    { 0xfc, 0xdc },
-    { 0xfd, 0xdd },
-    { 0xfe, 0xde }
-  };
+  return onigenc_apply_all_case_fold_with_map(
+             sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 1,
+             flag, f, arg);
+}
 
-  if (flag == ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) {
-    *ccs = OnigAsciiPairAmbigCodes;
-    return 52;
-  }
-  if (flag == ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) {
-    *ccs = cc;
-    return sizeof(cc) / sizeof(OnigPairAmbigCodes);
-  }
-  else
-    return 0;
+static int
+get_case_fold_codes_by_str(OnigCaseFoldType flag,
+    const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[])
+{
+  return onigenc_get_case_fold_codes_by_str_with_map(
+            sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 1,
+            flag, p, end, items);
 }
 
 OnigEncodingType OnigEncodingISO_8859_9 = {
@@ -245,25 +213,15 @@ OnigEncodingType OnigEncodingISO_8859_9 = {
   "ISO-8859-9",  /* name */
   1,             /* max enc length */
   1,             /* min enc length */
-  (ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE |
-   ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE ),
-  {
-      (OnigCodePoint )'\\'                       /* esc */
-    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.'  */
-    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*'  */
-    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
-    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
-    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
-  },
   onigenc_is_mbc_newline_0x0a,
   onigenc_single_byte_mbc_to_code,
   onigenc_single_byte_code_to_mbclen,
   onigenc_single_byte_code_to_mbc,
-  iso_8859_9_mbc_to_normalize,
-  iso_8859_9_is_mbc_ambiguous,
-  iso_8859_9_get_all_pair_ambig_codes,
-  onigenc_ess_tsett_get_all_comp_ambig_codes,
-  iso_8859_9_is_code_ctype,
+  mbc_case_fold,
+  apply_all_case_fold,
+  get_case_fold_codes_by_str,
+  onigenc_minimum_property_name_to_ctype,
+  is_code_ctype,
   onigenc_not_support_get_ctype_code_range,
   onigenc_single_byte_left_adjust_char_head,
   onigenc_always_true_is_allowed_reverse_match
index d7277e862e978e4fb3c157894b2901ad258d7a4d..c6649572f5ae8e81e0e30cea89832c77c60cd8fc 100644 (file)
@@ -2,7 +2,7 @@
   koi8.c -  Oniguruma (regular expression library)
 **********************************************************************/
 /*-
- * Copyright (c) 2002-2004  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp>
+ * Copyright (c) 2002-2008  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -31,7 +31,7 @@
 
 #define ENC_KOI8_TO_LOWER_CASE(c) EncKOI8_ToLowerCaseTable[c]
 #define ENC_IS_KOI8_CTYPE(code,ctype) \
-  ((EncKOI8_CtypeTable[code] & ctype) != 0)
+  ((EncKOI8_CtypeTable[code] & CTYPE_TO_BIT(ctype)) != 0)
 
 static const UChar EncKOI8_ToLowerCaseTable[256] = {
   '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
@@ -69,22 +69,22 @@ static const UChar EncKOI8_ToLowerCaseTable[256] = {
 };
 
 static const unsigned short EncKOI8_CtypeTable[256] = {
-  0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
-  0x2008, 0x220c, 0x2209, 0x2208, 0x2208, 0x2208, 0x2008, 0x2008,
-  0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
-  0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
-  0x2284, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
-  0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
-  0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0,
-  0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
-  0x21a0, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x34a2,
-  0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
-  0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
-  0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x31a0,
-  0x21a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2,
-  0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
-  0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
-  0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x2008,
+  0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
+  0x4008, 0x420c, 0x4209, 0x4208, 0x4208, 0x4208, 0x4008, 0x4008,
+  0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
+  0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
+  0x4284, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
+  0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
+  0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0,
+  0x78b0, 0x78b0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
+  0x41a0, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x74a2,
+  0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2,
+  0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2,
+  0x74a2, 0x74a2, 0x74a2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x51a0,
+  0x41a0, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x70e2,
+  0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2,
+  0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2,
+  0x70e2, 0x70e2, 0x70e2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x4008,
   0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
   0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
   0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
@@ -93,52 +93,46 @@ static const unsigned short EncKOI8_CtypeTable[256] = {
   0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
   0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
   0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
-  0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2,
-  0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2,
-  0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2,
-  0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2,
-  0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2,
-  0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2,
-  0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2,
-  0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2
+  0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
+  0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
+  0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
+  0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
+  0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
+  0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
+  0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
+  0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2
 };
 
+
 static int
-koi8_mbc_to_normalize(OnigAmbigType flag,
-                            const OnigUChar** pp, const OnigUChar* end, OnigUChar* lower)
+koi8_mbc_case_fold(OnigCaseFoldType flag ARG_UNUSED,
+          const UChar** pp, const UChar* end ARG_UNUSED, UChar* lower)
 {
-  const OnigUChar* p = *pp;
+  const UChar* p = *pp;
 
-  if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
-       ONIGENC_IS_MBC_ASCII(p)) ||
-      ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 &&
-       !ONIGENC_IS_MBC_ASCII(p))) {
-    *lower = ENC_KOI8_TO_LOWER_CASE(*p);
-  }
-  else {
-    *lower = *p;
-  }
+  *lower = ENC_KOI8_TO_LOWER_CASE(*p);
   (*pp)++;
-  return 1; /* return byte length of converted char to lower */
+  return 1;
 }
 
+#if 0
 static int
 koi8_is_mbc_ambiguous(OnigAmbigType flag, const OnigUChar** pp, const OnigUChar* end)
 {
   const OnigUChar* p = *pp;
 
   (*pp)++;
-  if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
+  if (((flag & ONIGENC_CASE_FOLD_ASCII_CASE) != 0 &&
        ONIGENC_IS_MBC_ASCII(p)) ||
-      ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 &&
+      ((flag & ONIGENC_CASE_FOLD_NONASCII_CASE) != 0 &&
        !ONIGENC_IS_MBC_ASCII(p))) {
     int v = (EncKOI8_CtypeTable[*p] &
-             (ONIGENC_CTYPE_UPPER | ONIGENC_CTYPE_LOWER));
+             (BIT_CTYPE_UPPER | BIT_CTYPE_LOWER));
     return (v != 0 ? TRUE : FALSE);
   }
   return FALSE;
 }
-
+#endif
 
 static int
 koi8_is_code_ctype(OnigCodePoint code, unsigned int ctype)
@@ -149,89 +143,91 @@ koi8_is_code_ctype(OnigCodePoint code, unsigned int ctype)
     return FALSE;
 }
 
+static const OnigPairCaseFoldCodes CaseFoldMap[] = {
+ { 0xc0, 0xe0 },
+ { 0xc1, 0xe1 },
+ { 0xc2, 0xe2 },
+ { 0xc3, 0xe3 },
+ { 0xc4, 0xe4 },
+ { 0xc5, 0xe5 },
+ { 0xc6, 0xe6 },
+ { 0xc7, 0xe7 },
+ { 0xc8, 0xe8 },
+ { 0xc9, 0xe9 },
+ { 0xca, 0xea },
+ { 0xcb, 0xeb },
+ { 0xcc, 0xec },
+ { 0xcd, 0xed },
+ { 0xce, 0xee },
+ { 0xcf, 0xef },
+
+ { 0xd0, 0xf0 },
+ { 0xd1, 0xf1 },
+ { 0xd2, 0xf2 },
+ { 0xd3, 0xf3 },
+ { 0xd4, 0xf4 },
+ { 0xd5, 0xf5 },
+ { 0xd6, 0xf6 },
+ { 0xd7, 0xf7 },
+ { 0xd8, 0xf8 },
+ { 0xd9, 0xf9 },
+ { 0xda, 0xfa },
+ { 0xdb, 0xfb },
+ { 0xdc, 0xfc },
+ { 0xdd, 0xfd },
+ { 0xde, 0xfe },
+ { 0xdf, 0xff },
+
+ { 0xe0, 0xc0 },
+ { 0xe1, 0xc1 },
+ { 0xe2, 0xc2 },
+ { 0xe3, 0xc3 },
+ { 0xe4, 0xc4 },
+ { 0xe5, 0xc5 },
+ { 0xe6, 0xc6 },
+ { 0xe7, 0xc7 },
+ { 0xe8, 0xc8 },
+ { 0xe9, 0xc9 },
+ { 0xea, 0xca },
+ { 0xeb, 0xcb },
+ { 0xec, 0xcc },
+ { 0xed, 0xcd },
+ { 0xee, 0xce },
+ { 0xef, 0xcf },
+
+ { 0xf0, 0xd0 },
+ { 0xf1, 0xd1 },
+ { 0xf2, 0xd2 },
+ { 0xf3, 0xd3 },
+ { 0xf4, 0xd4 },
+ { 0xf5, 0xd5 },
+ { 0xf6, 0xd6 },
+ { 0xf7, 0xd7 },
+ { 0xf8, 0xd8 },
+ { 0xf9, 0xd9 },
+ { 0xfa, 0xda },
+ { 0xfb, 0xdb },
+ { 0xfc, 0xdc },
+ { 0xfe, 0xde },
+ { 0xff, 0xdf }
+};
+
 static int
-koi8_get_all_pair_ambig_codes(OnigAmbigType flag,
-                              const OnigPairAmbigCodes** ccs)
+koi8_apply_all_case_fold(OnigCaseFoldType flag,
+                        OnigApplyAllCaseFoldFunc f, void* arg)
 {
-  static const OnigPairAmbigCodes cc[] = {
-    { 0xc0, 0xe0 },
-    { 0xc1, 0xe1 },
-    { 0xc2, 0xe2 },
-    { 0xc3, 0xe3 },
-    { 0xc4, 0xe4 },
-    { 0xc5, 0xe5 },
-    { 0xc6, 0xe6 },
-    { 0xc7, 0xe7 },
-    { 0xc8, 0xe8 },
-    { 0xc9, 0xe9 },
-    { 0xca, 0xea },
-    { 0xcb, 0xeb },
-    { 0xcc, 0xec },
-    { 0xcd, 0xed },
-    { 0xce, 0xee },
-    { 0xcf, 0xef },
-
-    { 0xd0, 0xf0 },
-    { 0xd1, 0xf1 },
-    { 0xd2, 0xf2 },
-    { 0xd3, 0xf3 },
-    { 0xd4, 0xf4 },
-    { 0xd5, 0xf5 },
-    { 0xd6, 0xf6 },
-    { 0xd7, 0xf7 },
-    { 0xd8, 0xf8 },
-    { 0xd9, 0xf9 },
-    { 0xda, 0xfa },
-    { 0xdb, 0xfb },
-    { 0xdc, 0xfc },
-    { 0xdd, 0xfd },
-    { 0xde, 0xfe },
-    { 0xdf, 0xff },
-
-    { 0xe0, 0xc0 },
-    { 0xe1, 0xc1 },
-    { 0xe2, 0xc2 },
-    { 0xe3, 0xc3 },
-    { 0xe4, 0xc4 },
-    { 0xe5, 0xc5 },
-    { 0xe6, 0xc6 },
-    { 0xe7, 0xc7 },
-    { 0xe8, 0xc8 },
-    { 0xe9, 0xc9 },
-    { 0xea, 0xca },
-    { 0xeb, 0xcb },
-    { 0xec, 0xcc },
-    { 0xed, 0xcd },
-    { 0xee, 0xce },
-    { 0xef, 0xcf },
-
-    { 0xf0, 0xd0 },
-    { 0xf1, 0xd1 },
-    { 0xf2, 0xd2 },
-    { 0xf3, 0xd3 },
-    { 0xf4, 0xd4 },
-    { 0xf5, 0xd5 },
-    { 0xf6, 0xd6 },
-    { 0xf7, 0xd7 },
-    { 0xf8, 0xd8 },
-    { 0xf9, 0xd9 },
-    { 0xfa, 0xda },
-    { 0xfb, 0xdb },
-    { 0xfc, 0xdc },
-    { 0xfe, 0xde },
-    { 0xff, 0xdf }
-  };
+  return onigenc_apply_all_case_fold_with_map(
+             sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 0,
+             flag, f, arg);
+}
 
-  if (flag == ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) {
-    *ccs = OnigAsciiPairAmbigCodes;
-    return 52;
-  }
-  if (flag == ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) {
-    *ccs = cc;
-    return sizeof(cc) / sizeof(OnigPairAmbigCodes);
-  }
-  else
-    return 0;
+static int
+koi8_get_case_fold_codes_by_str(OnigCaseFoldType flag,
+    const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[])
+{
+  return onigenc_get_case_fold_codes_by_str_with_map(
+            sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 0,
+            flag, p, end, items);
 }
 
 OnigEncodingType OnigEncodingKOI8 = {
@@ -239,24 +235,14 @@ OnigEncodingType OnigEncodingKOI8 = {
   "KOI8",        /* name */
   1,             /* max enc length */
   1,             /* min enc length */
-  (ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE |
-   ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE ),
-  {
-      (OnigCodePoint )'\\'                       /* esc */
-    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.'  */
-    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*'  */
-    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
-    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
-    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
-  },
   onigenc_is_mbc_newline_0x0a,
   onigenc_single_byte_mbc_to_code,
   onigenc_single_byte_code_to_mbclen,
   onigenc_single_byte_code_to_mbc,
-  koi8_mbc_to_normalize,
-  koi8_is_mbc_ambiguous,
-  koi8_get_all_pair_ambig_codes,
-  onigenc_nothing_get_all_comp_ambig_codes,
+  koi8_mbc_case_fold,
+  koi8_apply_all_case_fold,
+  koi8_get_case_fold_codes_by_str,
+  onigenc_minimum_property_name_to_ctype,
   koi8_is_code_ctype,
   onigenc_not_support_get_ctype_code_range,
   onigenc_single_byte_left_adjust_char_head,
index 1010f5ff938e69430c8ce0350e0528ffaa1885ec..364dda15163b7c3dc8fe83067ae47978cfd06866 100644 (file)
@@ -2,7 +2,7 @@
   koi8_r.c -  Oniguruma (regular expression library)
 **********************************************************************/
 /*-
- * Copyright (c) 2002-2006  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp>
+ * Copyright (c) 2002-2007  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -31,7 +31,7 @@
 
 #define ENC_KOI8_R_TO_LOWER_CASE(c) EncKOI8_R_ToLowerCaseTable[c]
 #define ENC_IS_KOI8_R_CTYPE(code,ctype) \
-  ((EncKOI8_R_CtypeTable[code] & ctype) != 0)
+  ((EncKOI8_R_CtypeTable[code] & CTYPE_TO_BIT(ctype)) != 0)
 
 static const UChar EncKOI8_R_ToLowerCaseTable[256] = {
   '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
@@ -69,75 +69,63 @@ static const UChar EncKOI8_R_ToLowerCaseTable[256] = {
 };
 
 static const unsigned short EncKOI8_R_CtypeTable[256] = {
-  0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
-  0x2008, 0x220c, 0x2209, 0x2208, 0x2208, 0x2208, 0x2008, 0x2008,
-  0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
-  0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
-  0x2284, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
-  0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
-  0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0,
-  0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
-  0x21a0, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x34a2,
-  0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
-  0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
-  0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x31a0,
-  0x21a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2,
-  0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
-  0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
-  0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x2008,
+  0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
+  0x4008, 0x420c, 0x4209, 0x4208, 0x4208, 0x4208, 0x4008, 0x4008,
+  0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
+  0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
+  0x4284, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
+  0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
+  0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0,
+  0x78b0, 0x78b0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
+  0x41a0, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x74a2,
+  0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2,
+  0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2,
+  0x74a2, 0x74a2, 0x74a2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x51a0,
+  0x41a0, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x70e2,
+  0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2,
+  0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2,
+  0x70e2, 0x70e2, 0x70e2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x4008,
   0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0,
   0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0,
   0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0,
   0x00a0, 0x00a0, 0x0284, 0x00a0, 0x00a0, 0x10a0, 0x01a0, 0x00a0,
-  0x00a0, 0x00a0, 0x00a0, 0x10e2, 0x00a0, 0x00a0, 0x00a0, 0x00a0,
+  0x00a0, 0x00a0, 0x00a0, 0x30e2, 0x00a0, 0x00a0, 0x00a0, 0x00a0,
   0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0,
-  0x00a0, 0x00a0, 0x00a0, 0x14a2, 0x00a0, 0x00a0, 0x00a0, 0x00a0,
+  0x00a0, 0x00a0, 0x00a0, 0x34a2, 0x00a0, 0x00a0, 0x00a0, 0x00a0,
   0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0,
-  0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2,
-  0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2,
-  0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2,
-  0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2,
-  0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2,
-  0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2,
-  0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2,
-  0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2
+  0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
+  0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
+  0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
+  0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
+  0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
+  0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
+  0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
+  0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2
 };
 
 static int
-koi8_r_mbc_to_normalize(OnigAmbigType flag,
-                            const UChar** pp, const UChar* end, UChar* lower)
+koi8_r_mbc_case_fold(OnigCaseFoldType flag ARG_UNUSED,
+             const UChar** pp, const UChar* end ARG_UNUSED, UChar* lower)
 {
   const UChar* p = *pp;
 
-  if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
-       ONIGENC_IS_MBC_ASCII(p)) ||
-      ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 &&
-       !ONIGENC_IS_MBC_ASCII(p))) {
-    *lower = ENC_KOI8_R_TO_LOWER_CASE(*p);
-  }
-  else {
-    *lower = *p;
-  }
+  *lower = ENC_KOI8_R_TO_LOWER_CASE(*p);
   (*pp)++;
-  return 1; /* return byte length of converted char to lower */
+  return 1;
 }
 
+#if 0
 static int
-koi8_r_is_mbc_ambiguous(OnigAmbigType flag, const UChar** pp, const UChar* end)
+koi8_r_is_mbc_ambiguous(OnigCaseFoldType flag, const UChar** pp, const UChar* end)
 {
+  int v;
   const UChar* p = *pp;
 
   (*pp)++;
-  if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
-       ONIGENC_IS_MBC_ASCII(p)) ||
-      ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 &&
-       !ONIGENC_IS_MBC_ASCII(p))) {
-    int v = (EncKOI8_R_CtypeTable[*p] &
-             (ONIGENC_CTYPE_UPPER | ONIGENC_CTYPE_LOWER));
-    return (v != 0 ? TRUE : FALSE);
-  }
-  return FALSE;
+  v = (EncKOI8_R_CtypeTable[*p] & (BIT_CTYPE_UPPER | BIT_CTYPE_LOWER));
+  return (v != 0 ? TRUE : FALSE);
 }
+#endif
 
 static int
 koi8_r_is_code_ctype(OnigCodePoint code, unsigned int ctype)
@@ -148,92 +136,60 @@ koi8_r_is_code_ctype(OnigCodePoint code, unsigned int ctype)
     return FALSE;
 }
 
+static const OnigPairCaseFoldCodes CaseFoldMap[] = {
+  { 0xa3, 0xb3 },
+
+  { 0xc0, 0xe0 },
+  { 0xc1, 0xe1 },
+  { 0xc2, 0xe2 },
+  { 0xc3, 0xe3 },
+  { 0xc4, 0xe4 },
+  { 0xc5, 0xe5 },
+  { 0xc6, 0xe6 },
+  { 0xc7, 0xe7 },
+  { 0xc8, 0xe8 },
+  { 0xc9, 0xe9 },
+  { 0xca, 0xea },
+  { 0xcb, 0xeb },
+  { 0xcc, 0xec },
+  { 0xcd, 0xed },
+  { 0xce, 0xee },
+  { 0xcf, 0xef },
+
+  { 0xd0, 0xf0 },
+  { 0xd1, 0xf1 },
+  { 0xd2, 0xf2 },
+  { 0xd3, 0xf3 },
+  { 0xd4, 0xf4 },
+  { 0xd5, 0xf5 },
+  { 0xd6, 0xf6 },
+  { 0xd7, 0xf7 },
+  { 0xd8, 0xf8 },
+  { 0xd9, 0xf9 },
+  { 0xda, 0xfa },
+  { 0xdb, 0xfb },
+  { 0xdc, 0xfc },
+  { 0xdd, 0xfd },
+  { 0xde, 0xfe },
+  { 0xdf, 0xff }
+};
+
 static int
-koi8_r_get_all_pair_ambig_codes(OnigAmbigType flag,
-                                const OnigPairAmbigCodes** ccs)
+koi8_r_apply_all_case_fold(OnigCaseFoldType flag,
+                              OnigApplyAllCaseFoldFunc f, void* arg)
 {
-  static const OnigPairAmbigCodes cc[] = {
-    { 0xa3, 0xb3 },
-    { 0xb3, 0xa3 },
-
-    { 0xc0, 0xe0 },
-    { 0xc1, 0xe1 },
-    { 0xc2, 0xe2 },
-    { 0xc3, 0xe3 },
-    { 0xc4, 0xe4 },
-    { 0xc5, 0xe5 },
-    { 0xc6, 0xe6 },
-    { 0xc7, 0xe7 },
-    { 0xc8, 0xe8 },
-    { 0xc9, 0xe9 },
-    { 0xca, 0xea },
-    { 0xcb, 0xeb },
-    { 0xcc, 0xec },
-    { 0xcd, 0xed },
-    { 0xce, 0xee },
-    { 0xcf, 0xef },
-
-    { 0xd0, 0xf0 },
-    { 0xd1, 0xf1 },
-    { 0xd2, 0xf2 },
-    { 0xd3, 0xf3 },
-    { 0xd4, 0xf4 },
-    { 0xd5, 0xf5 },
-    { 0xd6, 0xf6 },
-    { 0xd7, 0xf7 },
-    { 0xd8, 0xf8 },
-    { 0xd9, 0xf9 },
-    { 0xda, 0xfa },
-    { 0xdb, 0xfb },
-    { 0xdc, 0xfc },
-    { 0xdd, 0xfd },
-    { 0xde, 0xfe },
-    { 0xdf, 0xff },
-
-    { 0xe0, 0xc0 },
-    { 0xe1, 0xc1 },
-    { 0xe2, 0xc2 },
-    { 0xe3, 0xc3 },
-    { 0xe4, 0xc4 },
-    { 0xe5, 0xc5 },
-    { 0xe6, 0xc6 },
-    { 0xe7, 0xc7 },
-    { 0xe8, 0xc8 },
-    { 0xe9, 0xc9 },
-    { 0xea, 0xca },
-    { 0xeb, 0xcb },
-    { 0xec, 0xcc },
-    { 0xed, 0xcd },
-    { 0xee, 0xce },
-    { 0xef, 0xcf },
-
-    { 0xf0, 0xd0 },
-    { 0xf1, 0xd1 },
-    { 0xf2, 0xd2 },
-    { 0xf3, 0xd3 },
-    { 0xf4, 0xd4 },
-    { 0xf5, 0xd5 },
-    { 0xf6, 0xd6 },
-    { 0xf7, 0xd7 },
-    { 0xf8, 0xd8 },
-    { 0xf9, 0xd9 },
-    { 0xfa, 0xda },
-    { 0xfb, 0xdb },
-    { 0xfc, 0xdc },
-    { 0xfe, 0xde },
-    { 0xff, 0xdf }
-  };
+  return onigenc_apply_all_case_fold_with_map(
+             sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 0,
+             flag, f, arg);
+}
 
-  if (flag == ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) {
-    *ccs = OnigAsciiPairAmbigCodes;
-    return 52;
-  }
-  if (flag == ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) {
-    *ccs = cc;
-    return sizeof(cc) / sizeof(OnigPairAmbigCodes);
-  }
-  else
-    return 0;
+static int
+koi8_r_get_case_fold_codes_by_str(OnigCaseFoldType flag,
+    const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[])
+{
+  return onigenc_get_case_fold_codes_by_str_with_map(
+            sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 0,
+            flag, p, end, items);
 }
 
 OnigEncodingType OnigEncodingKOI8_R = {
@@ -241,24 +197,14 @@ OnigEncodingType OnigEncodingKOI8_R = {
   "KOI8-R",       /* name */
   1,             /* max enc length */
   1,             /* min enc length */
-  (ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE |
-   ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE ),
-  {
-      (OnigCodePoint )'\\'                       /* esc */
-    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.'  */
-    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*'  */
-    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
-    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
-    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
-  },
   onigenc_is_mbc_newline_0x0a,
   onigenc_single_byte_mbc_to_code,
   onigenc_single_byte_code_to_mbclen,
   onigenc_single_byte_code_to_mbc,
-  koi8_r_mbc_to_normalize,
-  koi8_r_is_mbc_ambiguous,
-  koi8_r_get_all_pair_ambig_codes,
-  onigenc_nothing_get_all_comp_ambig_codes,
+  koi8_r_mbc_case_fold,
+  koi8_r_apply_all_case_fold,
+  koi8_r_get_case_fold_codes_by_str,
+  onigenc_minimum_property_name_to_ctype,
   koi8_r_is_code_ctype,
   onigenc_not_support_get_ctype_code_range,
   onigenc_single_byte_left_adjust_char_head,
index fcf057423cfc510ddbfa603d4c3b733ea0b00e49..285216ebdac1fc973edcbe73b54884346841e6a6 100644 (file)
@@ -2,7 +2,7 @@
   mktable.c
 **********************************************************************/
 /*-
- * Copyright (c) 2002-2006  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp>
+ * Copyright (c) 2002-2007  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
 
 #include <stdlib.h>
 #include <stdio.h>
+#include <locale.h>
+
+#define __USE_ISOC99
+#include <ctype.h>
 
-#define NOT_RUBY
 #include "regenc.h"
 
-#define UNICODE_ISO_8859_1     0
-#define ISO_8859_1     1
-#define ISO_8859_2     2
-#define ISO_8859_3     3
-#define ISO_8859_4     4
-#define ISO_8859_5     5
-#define ISO_8859_6     6
-#define ISO_8859_7     7
-#define ISO_8859_8     8
-#define ISO_8859_9     9
-#define ISO_8859_10   10
-#define ISO_8859_11   11
-#define ISO_8859_13   12
-#define ISO_8859_14   13
-#define ISO_8859_15   14
-#define ISO_8859_16   15
-#define KOI8          16
-#define KOI8_R        17
+#define ASCII                0
+#define UNICODE_ISO_8859_1   1
+#define ISO_8859_1           2
+#define ISO_8859_2           3
+#define ISO_8859_3           4
+#define ISO_8859_4           5
+#define ISO_8859_5           6
+#define ISO_8859_6           7
+#define ISO_8859_7           8
+#define ISO_8859_8           9
+#define ISO_8859_9          10
+#define ISO_8859_10         11
+#define ISO_8859_11         12
+#define ISO_8859_13         13
+#define ISO_8859_14         14
+#define ISO_8859_15         15
+#define ISO_8859_16         16
+#define KOI8                17
+#define KOI8_R              18
 
 typedef struct {
   int   num;
@@ -58,6 +62,7 @@ typedef struct {
 } ENC_INFO;
 
 static ENC_INFO Info[] = {
+  { ASCII,               "ASCII" },
   { UNICODE_ISO_8859_1,  "UNICODE_ISO_8859_1"  },
   { ISO_8859_1,  "ISO_8859_1"  },
   { ISO_8859_2,  "ISO_8859_2"  },
@@ -81,6 +86,9 @@ static ENC_INFO Info[] = {
 
 static int IsAlpha(int enc, int c)
 {
+  if (enc == ASCII)
+    return isalpha(c);
+
   if (c >= 0x41 && c <= 0x5a) return 1;
   if (c >= 0x61 && c <= 0x7a) return 1;
 
@@ -255,6 +263,9 @@ static int IsAlpha(int enc, int c)
 
 static int IsBlank(int enc, int c)
 {
+  if (enc == ASCII)
+    return isblank(c);
+
   if (c == 0x09        || c == 0x20) return 1;
 
   switch (enc) {
@@ -291,6 +302,9 @@ static int IsBlank(int enc, int c)
 
 static int IsCntrl(int enc, int c)
 {
+  if (enc == ASCII)
+    return iscntrl(c);
+
   if (c >= 0x00        && c <= 0x1F) return 1;
 
   switch (enc) {
@@ -328,7 +342,7 @@ static int IsCntrl(int enc, int c)
   return 0;
 }
 
-static int IsDigit(int enc, int c)
+static int IsDigit(int enc ARG_UNUSED, int c)
 {
   if (c >= 0x30 && c <= 0x39) return 1;
   return 0;
@@ -336,6 +350,9 @@ static int IsDigit(int enc, int c)
 
 static int IsGraph(int enc, int c)
 {
+  if (enc == ASCII)
+    return isgraph(c);
+
   if (c >= 0x21 && c <= 0x7e) return 1;
 
   switch (enc) {
@@ -405,6 +422,9 @@ static int IsGraph(int enc, int c)
 
 static int IsLower(int enc, int c)
 {
+  if (enc == ASCII)
+    return islower(c);
+
   if (c >= 0x61 && c <= 0x7a) return 1;
 
   switch (enc) {
@@ -534,6 +554,9 @@ static int IsLower(int enc, int c)
 
 static int IsPrint(int enc, int c)
 {
+  if (enc == ASCII)
+    return isprint(c);
+
   if (c >= 0x20 && c <= 0x7e) return 1;
 
   switch (enc) {
@@ -609,6 +632,9 @@ static int IsPrint(int enc, int c)
 
 static int IsPunct(int enc, int c)
 {
+  if (enc == ASCII)
+    return ispunct(c);
+
   if (enc == UNICODE_ISO_8859_1) {
     if (c == 0x24 || c == 0x2b || c == 0x5e || c == 0x60 ||
         c == 0x7c || c == 0x7e) return 1;
@@ -705,6 +731,9 @@ static int IsPunct(int enc, int c)
 
 static int IsSpace(int enc, int c)
 {
+  if (enc == ASCII)
+    return isspace(c);
+
   if (c >= 0x09 && c <= 0x0d) return 1;
   if (c == 0x20) return 1;
 
@@ -744,6 +773,9 @@ static int IsSpace(int enc, int c)
 
 static int IsUpper(int enc, int c)
 {
+  if (enc == ASCII)
+    return isupper(c);
+
   if (c >= 0x41 && c <= 0x5a) return 1;
 
   switch (enc) {
@@ -868,6 +900,9 @@ static int IsUpper(int enc, int c)
 
 static int IsXDigit(int enc, int c)
 {
+  if (enc == ASCII)
+    return isxdigit(c);
+
   if (c >= 0x30 && c <= 0x39) return 1;
   if (c >= 0x41 && c <= 0x46) return 1;
   if (c >= 0x61 && c <= 0x66) return 1;
@@ -876,6 +911,10 @@ static int IsXDigit(int enc, int c)
 
 static int IsWord(int enc, int c)
 {
+  if (enc == ASCII) {
+    return (isalpha(c) || isdigit(c) || c == 0x5f);
+  }
+
   if (c >= 0x30 && c <= 0x39) return 1;
   if (c >= 0x41 && c <= 0x5a) return 1;
   if (c == 0x5f) return 1;
@@ -1052,13 +1091,13 @@ static int IsWord(int enc, int c)
   return 0;
 }
 
-static int IsAscii(int enc, int c)
+static int IsAscii(int enc ARG_UNUSED, int c)
 {
   if (c >= 0x00 && c <= 0x7f) return 1;
   return 0;
 }
 
-static int IsNewline(int enc, int c)
+static int IsNewline(int enc ARG_UNUSED, int c)
 {
   if (c == 0x0a) return 1;
   return 0;
@@ -1072,25 +1111,25 @@ static int exec(FILE* fp, ENC_INFO* einfo)
 
   enc = einfo->num;
 
-  fprintf(fp, "static unsigned short Enc%s_CtypeTable[256] = {\n",
+  fprintf(fp, "static const unsigned short Enc%s_CtypeTable[256] = {\n",
          einfo->name);
 
   for (c = 0; c < 256; c++) {
     val = 0;
-    if (IsNewline(enc, c))  val |= ONIGENC_CTYPE_NEWLINE;
-    if (IsAlpha (enc, c))   val |= ONIGENC_CTYPE_ALPHA;
-    if (IsBlank (enc, c))   val |= ONIGENC_CTYPE_BLANK;
-    if (IsCntrl (enc, c))   val |= ONIGENC_CTYPE_CNTRL;
-    if (IsDigit (enc, c))   val |= ONIGENC_CTYPE_DIGIT;
-    if (IsGraph (enc, c))   val |= ONIGENC_CTYPE_GRAPH;
-    if (IsLower (enc, c))   val |= ONIGENC_CTYPE_LOWER;
-    if (IsPrint (enc, c))   val |= ONIGENC_CTYPE_PRINT;
-    if (IsPunct (enc, c))   val |= ONIGENC_CTYPE_PUNCT;
-    if (IsSpace (enc, c))   val |= ONIGENC_CTYPE_SPACE;
-    if (IsUpper (enc, c))   val |= ONIGENC_CTYPE_UPPER;
-    if (IsXDigit(enc, c))   val |= ONIGENC_CTYPE_XDIGIT;
-    if (IsWord  (enc, c))   val |= ONIGENC_CTYPE_WORD;
-    if (IsAscii (enc, c))   val |= ONIGENC_CTYPE_ASCII;
+    if (IsNewline(enc, c))  val |= BIT_CTYPE_NEWLINE;
+    if (IsAlpha (enc, c))   val |= (BIT_CTYPE_ALPHA | BIT_CTYPE_ALNUM);
+    if (IsBlank (enc, c))   val |= BIT_CTYPE_BLANK;
+    if (IsCntrl (enc, c))   val |= BIT_CTYPE_CNTRL;
+    if (IsDigit (enc, c))   val |= (BIT_CTYPE_DIGIT | BIT_CTYPE_ALNUM);
+    if (IsGraph (enc, c))   val |= BIT_CTYPE_GRAPH;
+    if (IsLower (enc, c))   val |= BIT_CTYPE_LOWER;
+    if (IsPrint (enc, c))   val |= BIT_CTYPE_PRINT;
+    if (IsPunct (enc, c))   val |= BIT_CTYPE_PUNCT;
+    if (IsSpace (enc, c))   val |= BIT_CTYPE_SPACE;
+    if (IsUpper (enc, c))   val |= BIT_CTYPE_UPPER;
+    if (IsXDigit(enc, c))   val |= BIT_CTYPE_XDIGIT;
+    if (IsWord  (enc, c))   val |= BIT_CTYPE_WORD;
+    if (IsAscii (enc, c))   val |= BIT_CTYPE_ASCII;
 
     if (c % NCOL == 0) fputs("  ", fp);
     fprintf(fp, "0x%04x", val);
@@ -1104,12 +1143,20 @@ static int exec(FILE* fp, ENC_INFO* einfo)
   return 0;
 }
 
-extern int main(int argc, char* argv[])
+extern int main(int argc ARG_UNUSED, char* argv[] ARG_UNUSED)
 {
   int i;
   FILE* fp = stdout;
 
-  for (i = 0; i < sizeof(Info)/sizeof(ENC_INFO); i++) {
+  setlocale(LC_ALL, "C");
+  /* setlocale(LC_ALL, "POSIX"); */
+  /* setlocale(LC_ALL, "en_GB.iso88591"); */
+  /* setlocale(LC_ALL, "de_BE.iso88591"); */
+  /* setlocale(LC_ALL, "fr_FR.iso88591"); */
+
+  for (i = 0; i < (int )(sizeof(Info)/sizeof(ENC_INFO)); i++) {
     exec(fp, &Info[i]);
   }
+
+  return 0;
 }
index f7d7d5226554015d82ba077cf3f1d9a3f6c3a40b..7a54c9fb3a5214ee5a94fd9af32b1e07b523aa77 100644 (file)
@@ -2,7 +2,7 @@
   sjis.c -  Oniguruma (regular expression library)
 **********************************************************************/
 /*-
- * Copyright (c) 2002-2005  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp>
+ * Copyright (c) 2002-2008  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -27,7 +27,7 @@
  * SUCH DAMAGE.
  */
 
-#include "regenc.h"
+#include "regint.h"
 
 static const int EncLen_SJIS[] = {
   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
@@ -71,13 +71,13 @@ static const char SJIS_CAN_BE_TRAIL_TABLE[256] = {
 #define SJIS_ISMB_TRAIL(byte)  SJIS_CAN_BE_TRAIL_TABLE[(byte)]
 
 static int
-sjis_mbc_enc_len(const UChar* p)
+mbc_enc_len(const UChar* p)
 {
   return EncLen_SJIS[*p];
 }
 
 static int
-sjis_code_to_mbclen(OnigCodePoint code)
+code_to_mbclen(OnigCodePoint code)
 {
   if (code < 256) {
     if (EncLen_SJIS[(int )code] == 1)
@@ -89,16 +89,16 @@ sjis_code_to_mbclen(OnigCodePoint code)
     return 2;
   }
   else
-    return 0;
+    return ONIGERR_INVALID_CODE_POINT_VALUE;
 }
 
 static OnigCodePoint
-sjis_mbc_to_code(const UChar* p, const UChar* end)
+mbc_to_code(const UChar* p, const UChar* end)
 {
   int c, i, len;
   OnigCodePoint n;
 
-  len = enc_len(ONIG_ENCODING_SJIS, p);
+  len = enclen(ONIG_ENCODING_SJIS, p);
   c = *p++;
   n = c;
   if (len == 1) return n;
@@ -112,7 +112,7 @@ sjis_mbc_to_code(const UChar* p, const UChar* end)
 }
 
 static int
-sjis_code_to_mbc(OnigCodePoint code, UChar *buf)
+code_to_mbc(OnigCodePoint code, UChar *buf)
 {
   UChar *p = buf;
 
@@ -120,67 +120,63 @@ sjis_code_to_mbc(OnigCodePoint code, UChar *buf)
   *p++ = (UChar )(code & 0xff);
 
 #if 0
-  if (enc_len(ONIG_ENCODING_SJIS, buf) != (p - buf))
-    return REGERR_INVALID_WIDE_CHAR_VALUE;
+  if (enclen(ONIG_ENCODING_SJIS, buf) != (p - buf))
+    return REGERR_INVALID_CODE_POINT_VALUE;
 #endif
   return p - buf;
 }
 
 static int
-sjis_mbc_to_normalize(OnigAmbigType flag,
-                     const UChar** pp, const UChar* end, UChar* lower)
+mbc_case_fold(OnigCaseFoldType flag ARG_UNUSED,
+             const UChar** pp, const UChar* end ARG_UNUSED, UChar* lower)
 {
   const UChar* p = *pp;
 
   if (ONIGENC_IS_MBC_ASCII(p)) {
-    if ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0) {
-      *lower = ONIGENC_ASCII_CODE_TO_LOWER_CASE(*p);
-    }
-    else {
-      *lower = *p;
-    }
-
+    *lower = ONIGENC_ASCII_CODE_TO_LOWER_CASE(*p);
     (*pp)++;
     return 1;
   }
   else {
-    int len = enc_len(ONIG_ENCODING_SJIS, p);
+    int i;
+    int len = enclen(ONIG_ENCODING_SJIS, p);
 
-    if (lower != p) {
-      int i;
-      for (i = 0; i < len; i++) {
-       *lower++ = *p++;
-      }
+    for (i = 0; i < len; i++) {
+      *lower++ = *p++;
     }
     (*pp) += len;
     return len; /* return byte length of converted char to lower */
   }
 }
 
+#if 0
 static int
-sjis_is_mbc_ambiguous(OnigAmbigType flag, const UChar** pp, const UChar* end)
+is_mbc_ambiguous(OnigCaseFoldType flag,
+                const UChar** pp, const UChar* end)
 {
   return onigenc_mbn_is_mbc_ambiguous(ONIG_ENCODING_SJIS, flag, pp, end);
                                       
 }
+#endif
 
+#if 0
 static int
-sjis_is_code_ctype(OnigCodePoint code, unsigned int ctype)
+is_code_ctype(OnigCodePoint code, unsigned int ctype)
 {
   if (code < 128)
     return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype);
   else {
-    if ((ctype & (ONIGENC_CTYPE_WORD |
-                  ONIGENC_CTYPE_GRAPH | ONIGENC_CTYPE_PRINT)) != 0) {
-      return (sjis_code_to_mbclen(code) > 1 ? TRUE : FALSE);
+    if (CTYPE_IS_WORD_GRAPH_PRINT(ctype)) {
+      return (code_to_mbclen(code) > 1 ? TRUE : FALSE);
     }
   }
 
   return FALSE;
 }
+#endif
 
 static UChar*
-sjis_left_adjust_char_head(const UChar* start, const UChar* s)
+left_adjust_char_head(const UChar* start, const UChar* s)
 {
   const UChar *p;
   int len;
@@ -196,43 +192,127 @@ sjis_left_adjust_char_head(const UChar* start, const UChar* s)
       }
     } 
   }
-  len = enc_len(ONIG_ENCODING_SJIS, p);
+  len = enclen(ONIG_ENCODING_SJIS, p);
   if (p + len > s) return (UChar* )p;
   p += len;
   return (UChar* )(p + ((s - p) & ~1));
 }
 
 static int
-sjis_is_allowed_reverse_match(const UChar* s, const UChar* end)
+is_allowed_reverse_match(const UChar* s, const UChar* end ARG_UNUSED)
 {
   const UChar c = *s;
   return (SJIS_ISMB_TRAIL(c) ? FALSE : TRUE);
 }
 
+
+static int PropertyInited = 0;
+static const OnigCodePoint** PropertyList;
+static int PropertyListNum;
+static int PropertyListSize;
+static hash_table_type* PropertyNameTable;
+
+static const OnigCodePoint CR_Hiragana[] = {
+  1,
+  0x829f, 0x82f1
+}; /* CR_Hiragana */
+
+static const OnigCodePoint CR_Katakana[] = {
+  4,
+  0x00a6, 0x00af,
+  0x00b1, 0x00dd,
+  0x8340, 0x837e,
+  0x8380, 0x8396,
+}; /* CR_Katakana */
+
+static int
+init_property_list(void)
+{
+  int r;
+
+  PROPERTY_LIST_ADD_PROP("Hiragana", CR_Hiragana);
+  PROPERTY_LIST_ADD_PROP("Katakana", CR_Katakana);
+  PropertyInited = 1;
+
+ end:
+  return r;
+}
+
+static int
+property_name_to_ctype(OnigEncoding enc, UChar* p, UChar* end)
+{
+  hash_data_type ctype;
+
+  PROPERTY_LIST_INIT_CHECK;
+
+  if (onig_st_lookup_strend(PropertyNameTable, p, end, &ctype) == 0) {
+    return onigenc_minimum_property_name_to_ctype(enc, p, end);
+  }
+
+  return (int )ctype;
+}
+
+static int
+is_code_ctype(OnigCodePoint code, unsigned int ctype)
+{
+  if (ctype <= ONIGENC_MAX_STD_CTYPE) {
+    if (code < 128)
+      return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype);
+    else {
+      if (CTYPE_IS_WORD_GRAPH_PRINT(ctype)) {
+       return (code_to_mbclen(code) > 1 ? TRUE : FALSE);
+      }
+    }
+  }
+  else {
+    PROPERTY_LIST_INIT_CHECK;
+
+    ctype -= (ONIGENC_MAX_STD_CTYPE + 1);
+    if (ctype >= (unsigned int )PropertyListNum)
+      return ONIGERR_TYPE_BUG;
+
+    return onig_is_in_code_range((UChar* )PropertyList[ctype], code);
+  }
+
+  return FALSE;
+}
+
+static int
+get_ctype_code_range(OnigCtype ctype, OnigCodePoint* sb_out,
+                    const OnigCodePoint* ranges[])
+{
+  if (ctype <= ONIGENC_MAX_STD_CTYPE) {
+    return ONIG_NO_SUPPORT_CONFIG;
+  }
+  else {
+    *sb_out = 0x80;
+
+    PROPERTY_LIST_INIT_CHECK;
+
+    ctype -= (ONIGENC_MAX_STD_CTYPE + 1);
+    if (ctype >= (OnigCtype )PropertyListNum)
+      return ONIGERR_TYPE_BUG;
+
+    *ranges = PropertyList[ctype];
+    return 0;
+  }
+}
+
 OnigEncodingType OnigEncodingSJIS = {
-  sjis_mbc_enc_len,
+  mbc_enc_len,
   "Shift_JIS",   /* name */
   2,             /* max byte length */
   1,             /* min byte length */
-  ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE,
-  {
-      (OnigCodePoint )'\\'                       /* esc */
-    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.'  */
-    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*'  */
-    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
-    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
-    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
-  },
   onigenc_is_mbc_newline_0x0a,
-  sjis_mbc_to_code,
-  sjis_code_to_mbclen,
-  sjis_code_to_mbc,
-  sjis_mbc_to_normalize,
-  sjis_is_mbc_ambiguous,
-  onigenc_ascii_get_all_pair_ambig_codes,
-  onigenc_nothing_get_all_comp_ambig_codes,
-  sjis_is_code_ctype,
-  onigenc_not_support_get_ctype_code_range,
-  sjis_left_adjust_char_head,
-  sjis_is_allowed_reverse_match
+  mbc_to_code,
+  code_to_mbclen,
+  code_to_mbc,
+  mbc_case_fold,
+  onigenc_ascii_apply_all_case_fold,
+  onigenc_ascii_get_case_fold_codes_by_str,
+  property_name_to_ctype,
+  is_code_ctype,
+  get_ctype_code_range,
+  left_adjust_char_head,
+  is_allowed_reverse_match
 };
index a8cf539014ca0caf5f2d2b2a16a7db8373280420..af7a86e088203c2628b13eb2edf8b5484c6b69ff 100644 (file)
@@ -2,7 +2,7 @@
   unicode.c -  Oniguruma (regular expression library)
 **********************************************************************/
 /*-
- * Copyright (c) 2002-2004  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp>
+ * Copyright (c) 2002-2008  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * SUCH DAMAGE.
  */
 
-#include "regenc.h"
+#include "regint.h"
 
+#define ONIGENC_IS_UNICODE_ISO_8859_1_CTYPE(code,ctype) \
+  ((EncUNICODE_ISO_8859_1_CtypeTable[code] & CTYPE_TO_BIT(ctype)) != 0)
+#if 0
+#define ONIGENC_IS_UNICODE_ISO_8859_1_BIT_CTYPE(code,cbit) \
+  ((EncUNICODE_ISO_8859_1_CtypeTable[code] & (cbit)) != 0)
+#endif
 
-const unsigned short OnigEnc_Unicode_ISO_8859_1_CtypeTable[256] = {
-  0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
-  0x2008, 0x228c, 0x2289, 0x2288, 0x2288, 0x2288, 0x2008, 0x2008,
-  0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
-  0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
-  0x2284, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
-  0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
-  0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0,
-  0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
-  0x21a0, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x34a2,
-  0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
-  0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
-  0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x31a0,
-  0x21a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2,
-  0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
-  0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
-  0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x2008,
+static const unsigned short EncUNICODE_ISO_8859_1_CtypeTable[256] = {
+  0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
+  0x4008, 0x428c, 0x4289, 0x4288, 0x4288, 0x4288, 0x4008, 0x4008,
+  0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
+  0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
+  0x4284, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
+  0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
+  0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0,
+  0x78b0, 0x78b0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
+  0x41a0, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x74a2,
+  0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2,
+  0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2,
+  0x74a2, 0x74a2, 0x74a2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x51a0,
+  0x41a0, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x70e2,
+  0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2,
+  0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2,
+  0x70e2, 0x70e2, 0x70e2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x4008,
   0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0288, 0x0008, 0x0008,
   0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
   0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
   0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
   0x0284, 0x01a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0,
-  0x00a0, 0x00a0, 0x10e2, 0x01a0, 0x00a0, 0x00a8, 0x00a0, 0x00a0,
-  0x00a0, 0x00a0, 0x10a0, 0x10a0, 0x00a0, 0x10e2, 0x00a0, 0x01a0,
-  0x00a0, 0x10a0, 0x10e2, 0x01a0, 0x10a0, 0x10a0, 0x10a0, 0x01a0,
-  0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2,
-  0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2,
-  0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x00a0,
-  0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x10e2,
-  0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2,
-  0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2,
-  0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x00a0,
-  0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2
+  0x00a0, 0x00a0, 0x30e2, 0x01a0, 0x00a0, 0x00a8, 0x00a0, 0x00a0,
+  0x00a0, 0x00a0, 0x10a0, 0x10a0, 0x00a0, 0x30e2, 0x00a0, 0x01a0,
+  0x00a0, 0x10a0, 0x30e2, 0x01a0, 0x10a0, 0x10a0, 0x10a0, 0x01a0,
+  0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
+  0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
+  0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x00a0,
+  0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x30e2,
+  0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
+  0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
+  0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x00a0,
+  0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2
 };
 
-static const OnigCodePoint CRAlnum[] = {
-#ifdef USE_UNICODE_FULL_RANGE_CTYPE
-  414,
-#else
-  9,
-#endif
-  0x0030, 0x0039,
+/* 'NEWLINE' */
+static const OnigCodePoint CR_NEWLINE[] = {
+  1,
+  0x000a, 0x000a
+}; /* CR_NEWLINE */
+
+/* 'Alpha': [[:Alpha:]] */
+static const OnigCodePoint CR_Alpha[] = {
+  418,
   0x0041, 0x005a,
   0x0061, 0x007a,
   0x00aa, 0x00aa,
@@ -79,15 +87,12 @@ static const OnigCodePoint CRAlnum[] = {
   0x00ba, 0x00ba,
   0x00c0, 0x00d6,
   0x00d8, 0x00f6,
-  0x00f8, 0x0236
-#ifdef USE_UNICODE_FULL_RANGE_CTYPE
-  ,
+  0x00f8, 0x0241,
   0x0250, 0x02c1,
   0x02c6, 0x02d1,
   0x02e0, 0x02e4,
   0x02ee, 0x02ee,
-  0x0300, 0x0357,
-  0x035d, 0x036f,
+  0x0300, 0x036f,
   0x037a, 0x037a,
   0x0386, 0x0386,
   0x0388, 0x038a,
@@ -95,41 +100,39 @@ static const OnigCodePoint CRAlnum[] = {
   0x038e, 0x03a1,
   0x03a3, 0x03ce,
   0x03d0, 0x03f5,
-  0x03f7, 0x03fb,
-  0x0400, 0x0481,
+  0x03f7, 0x0481,
   0x0483, 0x0486,
   0x0488, 0x04ce,
-  0x04d0, 0x04f5,
-  0x04f8, 0x04f9,
+  0x04d0, 0x04f9,
   0x0500, 0x050f,
   0x0531, 0x0556,
   0x0559, 0x0559,
   0x0561, 0x0587,
-  0x0591, 0x05a1,
-  0x05a3, 0x05b9,
+  0x0591, 0x05b9,
   0x05bb, 0x05bd,
   0x05bf, 0x05bf,
   0x05c1, 0x05c2,
-  0x05c4, 0x05c4,
+  0x05c4, 0x05c5,
+  0x05c7, 0x05c7,
   0x05d0, 0x05ea,
   0x05f0, 0x05f2,
   0x0610, 0x0615,
   0x0621, 0x063a,
-  0x0640, 0x0658,
-  0x0660, 0x0669,
+  0x0640, 0x065e,
   0x066e, 0x06d3,
   0x06d5, 0x06dc,
   0x06de, 0x06e8,
-  0x06ea, 0x06fc,
+  0x06ea, 0x06ef,
+  0x06fa, 0x06fc,
   0x06ff, 0x06ff,
   0x0710, 0x074a,
-  0x074d, 0x074f,
+  0x074d, 0x076d,
   0x0780, 0x07b1,
   0x0901, 0x0939,
   0x093c, 0x094d,
   0x0950, 0x0954,
   0x0958, 0x0963,
-  0x0966, 0x096f,
+  0x097d, 0x097d,
   0x0981, 0x0983,
   0x0985, 0x098c,
   0x098f, 0x0990,
@@ -139,11 +142,11 @@ static const OnigCodePoint CRAlnum[] = {
   0x09b6, 0x09b9,
   0x09bc, 0x09c4,
   0x09c7, 0x09c8,
-  0x09cb, 0x09cd,
+  0x09cb, 0x09ce,
   0x09d7, 0x09d7,
   0x09dc, 0x09dd,
   0x09df, 0x09e3,
-  0x09e6, 0x09f1,
+  0x09f0, 0x09f1,
   0x0a01, 0x0a03,
   0x0a05, 0x0a0a,
   0x0a0f, 0x0a10,
@@ -158,7 +161,7 @@ static const OnigCodePoint CRAlnum[] = {
   0x0a4b, 0x0a4d,
   0x0a59, 0x0a5c,
   0x0a5e, 0x0a5e,
-  0x0a66, 0x0a74,
+  0x0a70, 0x0a74,
   0x0a81, 0x0a83,
   0x0a85, 0x0a8d,
   0x0a8f, 0x0a91,
@@ -171,7 +174,6 @@ static const OnigCodePoint CRAlnum[] = {
   0x0acb, 0x0acd,
   0x0ad0, 0x0ad0,
   0x0ae0, 0x0ae3,
-  0x0ae6, 0x0aef,
   0x0b01, 0x0b03,
   0x0b05, 0x0b0c,
   0x0b0f, 0x0b10,
@@ -185,7 +187,6 @@ static const OnigCodePoint CRAlnum[] = {
   0x0b56, 0x0b57,
   0x0b5c, 0x0b5d,
   0x0b5f, 0x0b61,
-  0x0b66, 0x0b6f,
   0x0b71, 0x0b71,
   0x0b82, 0x0b83,
   0x0b85, 0x0b8a,
@@ -196,13 +197,11 @@ static const OnigCodePoint CRAlnum[] = {
   0x0b9e, 0x0b9f,
   0x0ba3, 0x0ba4,
   0x0ba8, 0x0baa,
-  0x0bae, 0x0bb5,
-  0x0bb7, 0x0bb9,
+  0x0bae, 0x0bb9,
   0x0bbe, 0x0bc2,
   0x0bc6, 0x0bc8,
   0x0bca, 0x0bcd,
   0x0bd7, 0x0bd7,
-  0x0be7, 0x0bef,
   0x0c01, 0x0c03,
   0x0c05, 0x0c0c,
   0x0c0e, 0x0c10,
@@ -214,7 +213,6 @@ static const OnigCodePoint CRAlnum[] = {
   0x0c4a, 0x0c4d,
   0x0c55, 0x0c56,
   0x0c60, 0x0c61,
-  0x0c66, 0x0c6f,
   0x0c82, 0x0c83,
   0x0c85, 0x0c8c,
   0x0c8e, 0x0c90,
@@ -227,7 +225,6 @@ static const OnigCodePoint CRAlnum[] = {
   0x0cd5, 0x0cd6,
   0x0cde, 0x0cde,
   0x0ce0, 0x0ce1,
-  0x0ce6, 0x0cef,
   0x0d02, 0x0d03,
   0x0d05, 0x0d0c,
   0x0d0e, 0x0d10,
@@ -238,7 +235,6 @@ static const OnigCodePoint CRAlnum[] = {
   0x0d4a, 0x0d4d,
   0x0d57, 0x0d57,
   0x0d60, 0x0d61,
-  0x0d66, 0x0d6f,
   0x0d82, 0x0d83,
   0x0d85, 0x0d96,
   0x0d9a, 0x0db1,
@@ -252,7 +248,6 @@ static const OnigCodePoint CRAlnum[] = {
   0x0df2, 0x0df3,
   0x0e01, 0x0e3a,
   0x0e40, 0x0e4e,
-  0x0e50, 0x0e59,
   0x0e81, 0x0e82,
   0x0e84, 0x0e84,
   0x0e87, 0x0e88,
@@ -269,11 +264,9 @@ static const OnigCodePoint CRAlnum[] = {
   0x0ec0, 0x0ec4,
   0x0ec6, 0x0ec6,
   0x0ec8, 0x0ecd,
-  0x0ed0, 0x0ed9,
   0x0edc, 0x0edd,
   0x0f00, 0x0f00,
   0x0f18, 0x0f19,
-  0x0f20, 0x0f29,
   0x0f35, 0x0f35,
   0x0f37, 0x0f37,
   0x0f39, 0x0f39,
@@ -289,39 +282,31 @@ static const OnigCodePoint CRAlnum[] = {
   0x1029, 0x102a,
   0x102c, 0x1032,
   0x1036, 0x1039,
-  0x1040, 0x1049,
   0x1050, 0x1059,
   0x10a0, 0x10c5,
-  0x10d0, 0x10f8,
+  0x10d0, 0x10fa,
+  0x10fc, 0x10fc,
   0x1100, 0x1159,
   0x115f, 0x11a2,
   0x11a8, 0x11f9,
-  0x1200, 0x1206,
-  0x1208, 0x1246,
-  0x1248, 0x1248,
+  0x1200, 0x1248,
   0x124a, 0x124d,
   0x1250, 0x1256,
   0x1258, 0x1258,
   0x125a, 0x125d,
-  0x1260, 0x1286,
-  0x1288, 0x1288,
+  0x1260, 0x1288,
   0x128a, 0x128d,
-  0x1290, 0x12ae,
-  0x12b0, 0x12b0,
+  0x1290, 0x12b0,
   0x12b2, 0x12b5,
   0x12b8, 0x12be,
   0x12c0, 0x12c0,
   0x12c2, 0x12c5,
-  0x12c8, 0x12ce,
-  0x12d0, 0x12d6,
-  0x12d8, 0x12ee,
-  0x12f0, 0x130e,
-  0x1310, 0x1310,
+  0x12c8, 0x12d6,
+  0x12d8, 0x1310,
   0x1312, 0x1315,
-  0x1318, 0x131e,
-  0x1320, 0x1346,
-  0x1348, 0x135a,
-  0x1369, 0x1371,
+  0x1318, 0x135a,
+  0x135f, 0x135f,
+  0x1380, 0x138f,
   0x13a0, 0x13f4,
   0x1401, 0x166c,
   0x166f, 0x1676,
@@ -338,17 +323,18 @@ static const OnigCodePoint CRAlnum[] = {
   0x17b6, 0x17d3,
   0x17d7, 0x17d7,
   0x17dc, 0x17dd,
-  0x17e0, 0x17e9,
   0x180b, 0x180d,
-  0x1810, 0x1819,
   0x1820, 0x1877,
   0x1880, 0x18a9,
   0x1900, 0x191c,
   0x1920, 0x192b,
   0x1930, 0x193b,
-  0x1946, 0x196d,
+  0x1950, 0x196d,
   0x1970, 0x1974,
-  0x1d00, 0x1d6b,
+  0x1980, 0x19a9,
+  0x19b0, 0x19c9,
+  0x1a00, 0x1a1b,
+  0x1d00, 0x1dc3,
   0x1e00, 0x1e9b,
   0x1ea0, 0x1ef9,
   0x1f00, 0x1f15,
@@ -372,7 +358,8 @@ static const OnigCodePoint CRAlnum[] = {
   0x1ff6, 0x1ffc,
   0x2071, 0x2071,
   0x207f, 0x207f,
-  0x20d0, 0x20ea,
+  0x2090, 0x2094,
+  0x20d0, 0x20eb,
   0x2102, 0x2102,
   0x2107, 0x2107,
   0x210a, 0x2113,
@@ -384,8 +371,23 @@ static const OnigCodePoint CRAlnum[] = {
   0x212a, 0x212d,
   0x212f, 0x2131,
   0x2133, 0x2139,
-  0x213d, 0x213f,
+  0x213c, 0x213f,
   0x2145, 0x2149,
+  0x2c00, 0x2c2e,
+  0x2c30, 0x2c5e,
+  0x2c80, 0x2ce4,
+  0x2d00, 0x2d25,
+  0x2d30, 0x2d65,
+  0x2d6f, 0x2d6f,
+  0x2d80, 0x2d96,
+  0x2da0, 0x2da6,
+  0x2da8, 0x2dae,
+  0x2db0, 0x2db6,
+  0x2db8, 0x2dbe,
+  0x2dc0, 0x2dc6,
+  0x2dc8, 0x2dce,
+  0x2dd0, 0x2dd6,
+  0x2dd8, 0x2dde,
   0x3005, 0x3006,
   0x302a, 0x302f,
   0x3031, 0x3035,
@@ -400,11 +402,13 @@ static const OnigCodePoint CRAlnum[] = {
   0x31a0, 0x31b7,
   0x31f0, 0x31ff,
   0x3400, 0x4db5,
-  0x4e00, 0x9fa5,
+  0x4e00, 0x9fbb,
   0xa000, 0xa48c,
+  0xa800, 0xa827,
   0xac00, 0xd7a3,
   0xf900, 0xfa2d,
   0xfa30, 0xfa6a,
+  0xfa70, 0xfad9,
   0xfb00, 0xfb06,
   0xfb13, 0xfb17,
   0xfb1d, 0xfb28,
@@ -422,7 +426,6 @@ static const OnigCodePoint CRAlnum[] = {
   0xfe20, 0xfe23,
   0xfe70, 0xfe74,
   0xfe76, 0xfefc,
-  0xff10, 0xff19,
   0xff21, 0xff3a,
   0xff41, 0xff5a,
   0xff66, 0xffbe,
@@ -440,19 +443,28 @@ static const OnigCodePoint CRAlnum[] = {
   0x10300, 0x1031e,
   0x10330, 0x10349,
   0x10380, 0x1039d,
+  0x103a0, 0x103c3,
+  0x103c8, 0x103cf,
   0x10400, 0x1049d,
-  0x104a0, 0x104a9,
   0x10800, 0x10805,
   0x10808, 0x10808,
   0x1080a, 0x10835,
   0x10837, 0x10838,
   0x1083c, 0x1083c,
   0x1083f, 0x1083f,
+  0x10a00, 0x10a03,
+  0x10a05, 0x10a06,
+  0x10a0c, 0x10a13,
+  0x10a15, 0x10a17,
+  0x10a19, 0x10a33,
+  0x10a38, 0x10a3a,
+  0x10a3f, 0x10a3f,
   0x1d165, 0x1d169,
   0x1d16d, 0x1d172,
   0x1d17b, 0x1d182,
   0x1d185, 0x1d18b,
   0x1d1aa, 0x1d1ad,
+  0x1d242, 0x1d244,
   0x1d400, 0x1d454,
   0x1d456, 0x1d49c,
   0x1d49e, 0x1d49f,
@@ -471,7 +483,7 @@ static const OnigCodePoint CRAlnum[] = {
   0x1d540, 0x1d544,
   0x1d546, 0x1d546,
   0x1d54a, 0x1d550,
-  0x1d552, 0x1d6a3,
+  0x1d552, 0x1d6a5,
   0x1d6a8, 0x1d6c0,
   0x1d6c2, 0x1d6da,
   0x1d6dc, 0x1d6fa,
@@ -483,76 +495,117 @@ static const OnigCodePoint CRAlnum[] = {
   0x1d78a, 0x1d7a8,
   0x1d7aa, 0x1d7c2,
   0x1d7c4, 0x1d7c9,
-  0x1d7ce, 0x1d7ff,
   0x20000, 0x2a6d6,
   0x2f800, 0x2fa1d,
   0xe0100, 0xe01ef
-#endif /* USE_UNICODE_FULL_RANGE_CTYPE */
-}; /* end of CRAlnum */
+}; /* CR_Alpha */
 
-static const OnigCodePoint CRAlpha[] = {
-#ifdef USE_UNICODE_FULL_RANGE_CTYPE
-  396,
-#else
-  8,
-#endif
-  0x0041, 0x005a,
-  0x0061, 0x007a,
-  0x00aa, 0x00aa,
-  0x00b5, 0x00b5,
-  0x00ba, 0x00ba,
-  0x00c0, 0x00d6,
-  0x00d8, 0x00f6,
-  0x00f8, 0x0236
-#ifdef USE_UNICODE_FULL_RANGE_CTYPE
-  ,
-  0x0250, 0x02c1,
-  0x02c6, 0x02d1,
-  0x02e0, 0x02e4,
-  0x02ee, 0x02ee,
-  0x0300, 0x0357,
-  0x035d, 0x036f,
+/* 'Blank': [[:Blank:]] */
+static const OnigCodePoint CR_Blank[] = {
+  9,
+  0x0009, 0x0009,
+  0x0020, 0x0020,
+  0x00a0, 0x00a0,
+  0x1680, 0x1680,
+  0x180e, 0x180e,
+  0x2000, 0x200a,
+  0x202f, 0x202f,
+  0x205f, 0x205f,
+  0x3000, 0x3000
+}; /* CR_Blank */
+
+/* 'Cntrl': [[:Cntrl:]] */
+static const OnigCodePoint CR_Cntrl[] = {
+  19,
+  0x0000, 0x001f,
+  0x007f, 0x009f,
+  0x00ad, 0x00ad,
+  0x0600, 0x0603,
+  0x06dd, 0x06dd,
+  0x070f, 0x070f,
+  0x17b4, 0x17b5,
+  0x200b, 0x200f,
+  0x202a, 0x202e,
+  0x2060, 0x2063,
+  0x206a, 0x206f,
+  0xd800, 0xf8ff,
+  0xfeff, 0xfeff,
+  0xfff9, 0xfffb,
+  0x1d173, 0x1d17a,
+  0xe0001, 0xe0001,
+  0xe0020, 0xe007f,
+  0xf0000, 0xffffd,
+  0x100000, 0x10fffd
+}; /* CR_Cntrl */
+
+/* 'Digit': [[:Digit:]] */
+static const OnigCodePoint CR_Digit[] = {
+  23,
+  0x0030, 0x0039,
+  0x0660, 0x0669,
+  0x06f0, 0x06f9,
+  0x0966, 0x096f,
+  0x09e6, 0x09ef,
+  0x0a66, 0x0a6f,
+  0x0ae6, 0x0aef,
+  0x0b66, 0x0b6f,
+  0x0be6, 0x0bef,
+  0x0c66, 0x0c6f,
+  0x0ce6, 0x0cef,
+  0x0d66, 0x0d6f,
+  0x0e50, 0x0e59,
+  0x0ed0, 0x0ed9,
+  0x0f20, 0x0f29,
+  0x1040, 0x1049,
+  0x17e0, 0x17e9,
+  0x1810, 0x1819,
+  0x1946, 0x194f,
+  0x19d0, 0x19d9,
+  0xff10, 0xff19,
+  0x104a0, 0x104a9,
+  0x1d7ce, 0x1d7ff
+}; /* CR_Digit */
+
+/* 'Graph': [[:Graph:]] */
+static const OnigCodePoint CR_Graph[] = {
+  424,
+  0x0021, 0x007e,
+  0x00a1, 0x0241,
+  0x0250, 0x036f,
+  0x0374, 0x0375,
   0x037a, 0x037a,
-  0x0386, 0x0386,
-  0x0388, 0x038a,
+  0x037e, 0x037e,
+  0x0384, 0x038a,
   0x038c, 0x038c,
   0x038e, 0x03a1,
   0x03a3, 0x03ce,
-  0x03d0, 0x03f5,
-  0x03f7, 0x03fb,
-  0x0400, 0x0481,
-  0x0483, 0x0486,
+  0x03d0, 0x0486,
   0x0488, 0x04ce,
-  0x04d0, 0x04f5,
-  0x04f8, 0x04f9,
+  0x04d0, 0x04f9,
   0x0500, 0x050f,
   0x0531, 0x0556,
-  0x0559, 0x0559,
+  0x0559, 0x055f,
   0x0561, 0x0587,
-  0x0591, 0x05a1,
-  0x05a3, 0x05b9,
-  0x05bb, 0x05bd,
-  0x05bf, 0x05bf,
-  0x05c1, 0x05c2,
-  0x05c4, 0x05c4,
+  0x0589, 0x058a,
+  0x0591, 0x05b9,
+  0x05bb, 0x05c7,
   0x05d0, 0x05ea,
-  0x05f0, 0x05f2,
-  0x0610, 0x0615,
+  0x05f0, 0x05f4,
+  0x0600, 0x0603,
+  0x060b, 0x0615,
+  0x061b, 0x061b,
+  0x061e, 0x061f,
   0x0621, 0x063a,
-  0x0640, 0x0658,
-  0x066e, 0x06d3,
-  0x06d5, 0x06dc,
-  0x06de, 0x06e8,
-  0x06ea, 0x06ef,
-  0x06fa, 0x06fc,
-  0x06ff, 0x06ff,
-  0x0710, 0x074a,
-  0x074d, 0x074f,
+  0x0640, 0x065e,
+  0x0660, 0x070d,
+  0x070f, 0x074a,
+  0x074d, 0x076d,
   0x0780, 0x07b1,
   0x0901, 0x0939,
   0x093c, 0x094d,
   0x0950, 0x0954,
-  0x0958, 0x0963,
+  0x0958, 0x0970,
+  0x097d, 0x097d,
   0x0981, 0x0983,
   0x0985, 0x098c,
   0x098f, 0x0990,
@@ -562,11 +615,11 @@ static const OnigCodePoint CRAlpha[] = {
   0x09b6, 0x09b9,
   0x09bc, 0x09c4,
   0x09c7, 0x09c8,
-  0x09cb, 0x09cd,
+  0x09cb, 0x09ce,
   0x09d7, 0x09d7,
   0x09dc, 0x09dd,
   0x09df, 0x09e3,
-  0x09f0, 0x09f1,
+  0x09e6, 0x09fa,
   0x0a01, 0x0a03,
   0x0a05, 0x0a0a,
   0x0a0f, 0x0a10,
@@ -581,7 +634,7 @@ static const OnigCodePoint CRAlpha[] = {
   0x0a4b, 0x0a4d,
   0x0a59, 0x0a5c,
   0x0a5e, 0x0a5e,
-  0x0a70, 0x0a74,
+  0x0a66, 0x0a74,
   0x0a81, 0x0a83,
   0x0a85, 0x0a8d,
   0x0a8f, 0x0a91,
@@ -594,6 +647,8 @@ static const OnigCodePoint CRAlpha[] = {
   0x0acb, 0x0acd,
   0x0ad0, 0x0ad0,
   0x0ae0, 0x0ae3,
+  0x0ae6, 0x0aef,
+  0x0af1, 0x0af1,
   0x0b01, 0x0b03,
   0x0b05, 0x0b0c,
   0x0b0f, 0x0b10,
@@ -607,7 +662,7 @@ static const OnigCodePoint CRAlpha[] = {
   0x0b56, 0x0b57,
   0x0b5c, 0x0b5d,
   0x0b5f, 0x0b61,
-  0x0b71, 0x0b71,
+  0x0b66, 0x0b71,
   0x0b82, 0x0b83,
   0x0b85, 0x0b8a,
   0x0b8e, 0x0b90,
@@ -617,12 +672,12 @@ static const OnigCodePoint CRAlpha[] = {
   0x0b9e, 0x0b9f,
   0x0ba3, 0x0ba4,
   0x0ba8, 0x0baa,
-  0x0bae, 0x0bb5,
-  0x0bb7, 0x0bb9,
+  0x0bae, 0x0bb9,
   0x0bbe, 0x0bc2,
   0x0bc6, 0x0bc8,
   0x0bca, 0x0bcd,
   0x0bd7, 0x0bd7,
+  0x0be6, 0x0bfa,
   0x0c01, 0x0c03,
   0x0c05, 0x0c0c,
   0x0c0e, 0x0c10,
@@ -634,6 +689,7 @@ static const OnigCodePoint CRAlpha[] = {
   0x0c4a, 0x0c4d,
   0x0c55, 0x0c56,
   0x0c60, 0x0c61,
+  0x0c66, 0x0c6f,
   0x0c82, 0x0c83,
   0x0c85, 0x0c8c,
   0x0c8e, 0x0c90,
@@ -646,6 +702,7 @@ static const OnigCodePoint CRAlpha[] = {
   0x0cd5, 0x0cd6,
   0x0cde, 0x0cde,
   0x0ce0, 0x0ce1,
+  0x0ce6, 0x0cef,
   0x0d02, 0x0d03,
   0x0d05, 0x0d0c,
   0x0d0e, 0x0d10,
@@ -656,6 +713,7 @@ static const OnigCodePoint CRAlpha[] = {
   0x0d4a, 0x0d4d,
   0x0d57, 0x0d57,
   0x0d60, 0x0d61,
+  0x0d66, 0x0d6f,
   0x0d82, 0x0d83,
   0x0d85, 0x0d96,
   0x0d9a, 0x0db1,
@@ -666,9 +724,9 @@ static const OnigCodePoint CRAlpha[] = {
   0x0dcf, 0x0dd4,
   0x0dd6, 0x0dd6,
   0x0dd8, 0x0ddf,
-  0x0df2, 0x0df3,
+  0x0df2, 0x0df4,
   0x0e01, 0x0e3a,
-  0x0e40, 0x0e4e,
+  0x0e3f, 0x0e5b,
   0x0e81, 0x0e82,
   0x0e84, 0x0e84,
   0x0e87, 0x0e88,
@@ -685,80 +743,74 @@ static const OnigCodePoint CRAlpha[] = {
   0x0ec0, 0x0ec4,
   0x0ec6, 0x0ec6,
   0x0ec8, 0x0ecd,
+  0x0ed0, 0x0ed9,
   0x0edc, 0x0edd,
-  0x0f00, 0x0f00,
-  0x0f18, 0x0f19,
-  0x0f35, 0x0f35,
-  0x0f37, 0x0f37,
-  0x0f39, 0x0f39,
-  0x0f3e, 0x0f47,
+  0x0f00, 0x0f47,
   0x0f49, 0x0f6a,
-  0x0f71, 0x0f84,
-  0x0f86, 0x0f8b,
+  0x0f71, 0x0f8b,
   0x0f90, 0x0f97,
   0x0f99, 0x0fbc,
-  0x0fc6, 0x0fc6,
+  0x0fbe, 0x0fcc,
+  0x0fcf, 0x0fd1,
   0x1000, 0x1021,
   0x1023, 0x1027,
   0x1029, 0x102a,
   0x102c, 0x1032,
   0x1036, 0x1039,
-  0x1050, 0x1059,
+  0x1040, 0x1059,
   0x10a0, 0x10c5,
-  0x10d0, 0x10f8,
+  0x10d0, 0x10fc,
   0x1100, 0x1159,
   0x115f, 0x11a2,
   0x11a8, 0x11f9,
-  0x1200, 0x1206,
-  0x1208, 0x1246,
-  0x1248, 0x1248,
+  0x1200, 0x1248,
   0x124a, 0x124d,
   0x1250, 0x1256,
   0x1258, 0x1258,
   0x125a, 0x125d,
-  0x1260, 0x1286,
-  0x1288, 0x1288,
+  0x1260, 0x1288,
   0x128a, 0x128d,
-  0x1290, 0x12ae,
-  0x12b0, 0x12b0,
+  0x1290, 0x12b0,
   0x12b2, 0x12b5,
   0x12b8, 0x12be,
   0x12c0, 0x12c0,
   0x12c2, 0x12c5,
-  0x12c8, 0x12ce,
-  0x12d0, 0x12d6,
-  0x12d8, 0x12ee,
-  0x12f0, 0x130e,
-  0x1310, 0x1310,
+  0x12c8, 0x12d6,
+  0x12d8, 0x1310,
   0x1312, 0x1315,
-  0x1318, 0x131e,
-  0x1320, 0x1346,
-  0x1348, 0x135a,
+  0x1318, 0x135a,
+  0x135f, 0x137c,
+  0x1380, 0x1399,
   0x13a0, 0x13f4,
-  0x1401, 0x166c,
-  0x166f, 0x1676,
-  0x1681, 0x169a,
-  0x16a0, 0x16ea,
+  0x1401, 0x1676,
+  0x1681, 0x169c,
+  0x16a0, 0x16f0,
   0x1700, 0x170c,
   0x170e, 0x1714,
-  0x1720, 0x1734,
+  0x1720, 0x1736,
   0x1740, 0x1753,
   0x1760, 0x176c,
   0x176e, 0x1770,
   0x1772, 0x1773,
-  0x1780, 0x17b3,
-  0x17b6, 0x17d3,
-  0x17d7, 0x17d7,
-  0x17dc, 0x17dd,
-  0x180b, 0x180d,
+  0x1780, 0x17dd,
+  0x17e0, 0x17e9,
+  0x17f0, 0x17f9,
+  0x1800, 0x180d,
+  0x1810, 0x1819,
   0x1820, 0x1877,
   0x1880, 0x18a9,
   0x1900, 0x191c,
   0x1920, 0x192b,
   0x1930, 0x193b,
-  0x1950, 0x196d,
+  0x1940, 0x1940,
+  0x1944, 0x196d,
   0x1970, 0x1974,
-  0x1d00, 0x1d6b,
+  0x1980, 0x19a9,
+  0x19b0, 0x19c9,
+  0x19d0, 0x19d9,
+  0x19de, 0x1a1b,
+  0x1a1e, 0x1a1f,
+  0x1d00, 0x1dc3,
   0x1e00, 0x1e9b,
   0x1ea0, 0x1ef9,
   0x1f00, 0x1f15,
@@ -771,74 +823,111 @@ static const OnigCodePoint CRAlpha[] = {
   0x1f5d, 0x1f5d,
   0x1f5f, 0x1f7d,
   0x1f80, 0x1fb4,
-  0x1fb6, 0x1fbc,
-  0x1fbe, 0x1fbe,
-  0x1fc2, 0x1fc4,
-  0x1fc6, 0x1fcc,
-  0x1fd0, 0x1fd3,
+  0x1fb6, 0x1fc4,
+  0x1fc6, 0x1fd3,
   0x1fd6, 0x1fdb,
-  0x1fe0, 0x1fec,
+  0x1fdd, 0x1fef,
   0x1ff2, 0x1ff4,
-  0x1ff6, 0x1ffc,
-  0x2071, 0x2071,
-  0x207f, 0x207f,
-  0x20d0, 0x20ea,
-  0x2102, 0x2102,
-  0x2107, 0x2107,
-  0x210a, 0x2113,
-  0x2115, 0x2115,
-  0x2119, 0x211d,
-  0x2124, 0x2124,
-  0x2126, 0x2126,
-  0x2128, 0x2128,
-  0x212a, 0x212d,
-  0x212f, 0x2131,
-  0x2133, 0x2139,
-  0x213d, 0x213f,
-  0x2145, 0x2149,
-  0x3005, 0x3006,
-  0x302a, 0x302f,
-  0x3031, 0x3035,
-  0x303b, 0x303c,
+  0x1ff6, 0x1ffe,
+  0x200b, 0x2027,
+  0x202a, 0x202e,
+  0x2030, 0x205e,
+  0x2060, 0x2063,
+  0x206a, 0x2071,
+  0x2074, 0x208e,
+  0x2090, 0x2094,
+  0x20a0, 0x20b5,
+  0x20d0, 0x20eb,
+  0x2100, 0x214c,
+  0x2153, 0x2183,
+  0x2190, 0x23db,
+  0x2400, 0x2426,
+  0x2440, 0x244a,
+  0x2460, 0x269c,
+  0x26a0, 0x26b1,
+  0x2701, 0x2704,
+  0x2706, 0x2709,
+  0x270c, 0x2727,
+  0x2729, 0x274b,
+  0x274d, 0x274d,
+  0x274f, 0x2752,
+  0x2756, 0x2756,
+  0x2758, 0x275e,
+  0x2761, 0x2794,
+  0x2798, 0x27af,
+  0x27b1, 0x27be,
+  0x27c0, 0x27c6,
+  0x27d0, 0x27eb,
+  0x27f0, 0x2b13,
+  0x2c00, 0x2c2e,
+  0x2c30, 0x2c5e,
+  0x2c80, 0x2cea,
+  0x2cf9, 0x2d25,
+  0x2d30, 0x2d65,
+  0x2d6f, 0x2d6f,
+  0x2d80, 0x2d96,
+  0x2da0, 0x2da6,
+  0x2da8, 0x2dae,
+  0x2db0, 0x2db6,
+  0x2db8, 0x2dbe,
+  0x2dc0, 0x2dc6,
+  0x2dc8, 0x2dce,
+  0x2dd0, 0x2dd6,
+  0x2dd8, 0x2dde,
+  0x2e00, 0x2e17,
+  0x2e1c, 0x2e1d,
+  0x2e80, 0x2e99,
+  0x2e9b, 0x2ef3,
+  0x2f00, 0x2fd5,
+  0x2ff0, 0x2ffb,
+  0x3001, 0x303f,
   0x3041, 0x3096,
-  0x3099, 0x309a,
-  0x309d, 0x309f,
-  0x30a1, 0x30fa,
-  0x30fc, 0x30ff,
+  0x3099, 0x30ff,
   0x3105, 0x312c,
   0x3131, 0x318e,
-  0x31a0, 0x31b7,
-  0x31f0, 0x31ff,
-  0x3400, 0x4db5,
-  0x4e00, 0x9fa5,
-  0xa000, 0xa48c,
-  0xac00, 0xd7a3,
-  0xf900, 0xfa2d,
+  0x3190, 0x31b7,
+  0x31c0, 0x31cf,
+  0x31f0, 0x321e,
+  0x3220, 0x3243,
+  0x3250, 0x32fe,
+  0x3300, 0x4db5,
+  0x4dc0, 0x9fbb,
+  0xa000, 0xa48c,
+  0xa490, 0xa4c6,
+  0xa700, 0xa716,
+  0xa800, 0xa82b,
+  0xac00, 0xd7a3,
+  0xe000, 0xfa2d,
   0xfa30, 0xfa6a,
+  0xfa70, 0xfad9,
   0xfb00, 0xfb06,
   0xfb13, 0xfb17,
-  0xfb1d, 0xfb28,
-  0xfb2a, 0xfb36,
+  0xfb1d, 0xfb36,
   0xfb38, 0xfb3c,
   0xfb3e, 0xfb3e,
   0xfb40, 0xfb41,
   0xfb43, 0xfb44,
   0xfb46, 0xfbb1,
-  0xfbd3, 0xfd3d,
+  0xfbd3, 0xfd3f,
   0xfd50, 0xfd8f,
   0xfd92, 0xfdc7,
-  0xfdf0, 0xfdfb,
-  0xfe00, 0xfe0f,
+  0xfdf0, 0xfdfd,
+  0xfe00, 0xfe19,
   0xfe20, 0xfe23,
+  0xfe30, 0xfe52,
+  0xfe54, 0xfe66,
+  0xfe68, 0xfe6b,
   0xfe70, 0xfe74,
   0xfe76, 0xfefc,
-  0xff21, 0xff3a,
-  0xff41, 0xff5a,
-  0xff66, 0xffbe,
+  0xfeff, 0xfeff,
+  0xff01, 0xffbe,
   0xffc2, 0xffc7,
   0xffca, 0xffcf,
   0xffd2, 0xffd7,
   0xffda, 0xffdc,
+  0xffe0, 0xffe6,
+  0xffe8, 0xffee,
+  0xfff9, 0xfffd,
   0x10000, 0x1000b,
   0x1000d, 0x10026,
   0x10028, 0x1003a,
@@ -846,21 +935,36 @@ static const OnigCodePoint CRAlpha[] = {
   0x1003f, 0x1004d,
   0x10050, 0x1005d,
   0x10080, 0x100fa,
+  0x10100, 0x10102,
+  0x10107, 0x10133,
+  0x10137, 0x1018a,
   0x10300, 0x1031e,
-  0x10330, 0x10349,
+  0x10320, 0x10323,
+  0x10330, 0x1034a,
   0x10380, 0x1039d,
+  0x1039f, 0x103c3,
+  0x103c8, 0x103d5,
   0x10400, 0x1049d,
+  0x104a0, 0x104a9,
   0x10800, 0x10805,
   0x10808, 0x10808,
   0x1080a, 0x10835,
   0x10837, 0x10838,
   0x1083c, 0x1083c,
   0x1083f, 0x1083f,
-  0x1d165, 0x1d169,
-  0x1d16d, 0x1d172,
-  0x1d17b, 0x1d182,
-  0x1d185, 0x1d18b,
-  0x1d1aa, 0x1d1ad,
+  0x10a00, 0x10a03,
+  0x10a05, 0x10a06,
+  0x10a0c, 0x10a13,
+  0x10a15, 0x10a17,
+  0x10a19, 0x10a33,
+  0x10a38, 0x10a3a,
+  0x10a3f, 0x10a47,
+  0x10a50, 0x10a58,
+  0x1d000, 0x1d0f5,
+  0x1d100, 0x1d126,
+  0x1d12a, 0x1d1dd,
+  0x1d200, 0x1d245,
+  0x1d300, 0x1d356,
   0x1d400, 0x1d454,
   0x1d456, 0x1d49c,
   0x1d49e, 0x1d49f,
@@ -879,538 +983,27 @@ static const OnigCodePoint CRAlpha[] = {
   0x1d540, 0x1d544,
   0x1d546, 0x1d546,
   0x1d54a, 0x1d550,
-  0x1d552, 0x1d6a3,
-  0x1d6a8, 0x1d6c0,
-  0x1d6c2, 0x1d6da,
-  0x1d6dc, 0x1d6fa,
-  0x1d6fc, 0x1d714,
-  0x1d716, 0x1d734,
-  0x1d736, 0x1d74e,
-  0x1d750, 0x1d76e,
-  0x1d770, 0x1d788,
-  0x1d78a, 0x1d7a8,
-  0x1d7aa, 0x1d7c2,
-  0x1d7c4, 0x1d7c9,
+  0x1d552, 0x1d6a5,
+  0x1d6a8, 0x1d7c9,
+  0x1d7ce, 0x1d7ff,
   0x20000, 0x2a6d6,
   0x2f800, 0x2fa1d,
-  0xe0100, 0xe01ef
-#endif /* USE_UNICODE_FULL_RANGE_CTYPE */
-}; /* end of CRAlpha */
-
-static const OnigCodePoint CRBlank[] = {
-#ifdef USE_UNICODE_FULL_RANGE_CTYPE
-  9,
-#else
-  3,
-#endif
-  0x0009, 0x0009,
-  0x0020, 0x0020,
-  0x00a0, 0x00a0
-#ifdef USE_UNICODE_FULL_RANGE_CTYPE
-  ,
-  0x1680, 0x1680,
-  0x180e, 0x180e,
-  0x2000, 0x200a,
-  0x202f, 0x202f,
-  0x205f, 0x205f,
-  0x3000, 0x3000
-#endif /* USE_UNICODE_FULL_RANGE_CTYPE */
-}; /* end of CRBlank */
-
-static const OnigCodePoint CRCntrl[] = {
-#ifdef USE_UNICODE_FULL_RANGE_CTYPE
-  19,
-#else
-  3,
-#endif
-  0x0000, 0x001f,
-  0x007f, 0x009f,
-  0x00ad, 0x00ad
-#ifdef USE_UNICODE_FULL_RANGE_CTYPE
-  ,
-  0x0600, 0x0603,
-  0x06dd, 0x06dd,
-  0x070f, 0x070f,
-  0x17b4, 0x17b5,
-  0x200b, 0x200f,
-  0x202a, 0x202e,
-  0x2060, 0x2063,
-  0x206a, 0x206f,
-  0xd800, 0xf8ff,
-  0xfeff, 0xfeff,
-  0xfff9, 0xfffb,
-  0x1d173, 0x1d17a,
   0xe0001, 0xe0001,
   0xe0020, 0xe007f,
+  0xe0100, 0xe01ef,
   0xf0000, 0xffffd,
   0x100000, 0x10fffd
-#endif /* USE_UNICODE_FULL_RANGE_CTYPE */
-}; /* end of CRCntrl */
-
-static const OnigCodePoint CRDigit[] = {
-#ifdef USE_UNICODE_FULL_RANGE_CTYPE
-  23,
-#else
-  1,
-#endif
-  0x0030, 0x0039
-#ifdef USE_UNICODE_FULL_RANGE_CTYPE
-  ,
-  0x0660, 0x0669,
-  0x06f0, 0x06f9,
-  0x0966, 0x096f,
-  0x09e6, 0x09ef,
-  0x0a66, 0x0a6f,
-  0x0ae6, 0x0aef,
-  0x0b66, 0x0b6f,
-  0x0be7, 0x0bef,
-  0x0c66, 0x0c6f,
-  0x0ce6, 0x0cef,
-  0x0d66, 0x0d6f,
-  0x0e50, 0x0e59,
-  0x0ed0, 0x0ed9,
-  0x0f20, 0x0f29,
-  0x1040, 0x1049,
-  0x1369, 0x1371,
-  0x17e0, 0x17e9,
-  0x1810, 0x1819,
-  0x1946, 0x194f,
-  0xff10, 0xff19,
-  0x104a0, 0x104a9,
-  0x1d7ce, 0x1d7ff
-#endif /* USE_UNICODE_FULL_RANGE_CTYPE */
-}; /* end of CRDigit */
+}; /* CR_Graph */
 
-static const OnigCodePoint CRGraph[] = {
-#ifdef USE_UNICODE_FULL_RANGE_CTYPE
-  405,
-#else
-  2,
-#endif
-  0x0021, 0x007e,
-  0x00a1, 0x0236
-#ifdef USE_UNICODE_FULL_RANGE_CTYPE
-  ,
-  0x0250, 0x0357,
-  0x035d, 0x036f,
-  0x0374, 0x0375,
-  0x037a, 0x037a,
-  0x037e, 0x037e,
-  0x0384, 0x038a,
-  0x038c, 0x038c,
-  0x038e, 0x03a1,
-  0x03a3, 0x03ce,
-  0x03d0, 0x03fb,
-  0x0400, 0x0486,
-  0x0488, 0x04ce,
-  0x04d0, 0x04f5,
-  0x04f8, 0x04f9,
-  0x0500, 0x050f,
-  0x0531, 0x0556,
-  0x0559, 0x055f,
-  0x0561, 0x0587,
-  0x0589, 0x058a,
-  0x0591, 0x05a1,
-  0x05a3, 0x05b9,
-  0x05bb, 0x05c4,
-  0x05d0, 0x05ea,
-  0x05f0, 0x05f4,
-  0x0600, 0x0603,
-  0x060c, 0x0615,
-  0x061b, 0x061b,
-  0x061f, 0x061f,
-  0x0621, 0x063a,
-  0x0640, 0x0658,
-  0x0660, 0x070d,
-  0x070f, 0x074a,
-  0x074d, 0x074f,
-  0x0780, 0x07b1,
-  0x0901, 0x0939,
-  0x093c, 0x094d,
-  0x0950, 0x0954,
-  0x0958, 0x0970,
-  0x0981, 0x0983,
-  0x0985, 0x098c,
-  0x098f, 0x0990,
-  0x0993, 0x09a8,
-  0x09aa, 0x09b0,
-  0x09b2, 0x09b2,
-  0x09b6, 0x09b9,
-  0x09bc, 0x09c4,
-  0x09c7, 0x09c8,
-  0x09cb, 0x09cd,
-  0x09d7, 0x09d7,
-  0x09dc, 0x09dd,
-  0x09df, 0x09e3,
-  0x09e6, 0x09fa,
-  0x0a01, 0x0a03,
-  0x0a05, 0x0a0a,
-  0x0a0f, 0x0a10,
-  0x0a13, 0x0a28,
-  0x0a2a, 0x0a30,
-  0x0a32, 0x0a33,
-  0x0a35, 0x0a36,
-  0x0a38, 0x0a39,
-  0x0a3c, 0x0a3c,
-  0x0a3e, 0x0a42,
-  0x0a47, 0x0a48,
-  0x0a4b, 0x0a4d,
-  0x0a59, 0x0a5c,
-  0x0a5e, 0x0a5e,
-  0x0a66, 0x0a74,
-  0x0a81, 0x0a83,
-  0x0a85, 0x0a8d,
-  0x0a8f, 0x0a91,
-  0x0a93, 0x0aa8,
-  0x0aaa, 0x0ab0,
-  0x0ab2, 0x0ab3,
-  0x0ab5, 0x0ab9,
-  0x0abc, 0x0ac5,
-  0x0ac7, 0x0ac9,
-  0x0acb, 0x0acd,
-  0x0ad0, 0x0ad0,
-  0x0ae0, 0x0ae3,
-  0x0ae6, 0x0aef,
-  0x0af1, 0x0af1,
-  0x0b01, 0x0b03,
-  0x0b05, 0x0b0c,
-  0x0b0f, 0x0b10,
-  0x0b13, 0x0b28,
-  0x0b2a, 0x0b30,
-  0x0b32, 0x0b33,
-  0x0b35, 0x0b39,
-  0x0b3c, 0x0b43,
-  0x0b47, 0x0b48,
-  0x0b4b, 0x0b4d,
-  0x0b56, 0x0b57,
-  0x0b5c, 0x0b5d,
-  0x0b5f, 0x0b61,
-  0x0b66, 0x0b71,
-  0x0b82, 0x0b83,
-  0x0b85, 0x0b8a,
-  0x0b8e, 0x0b90,
-  0x0b92, 0x0b95,
-  0x0b99, 0x0b9a,
-  0x0b9c, 0x0b9c,
-  0x0b9e, 0x0b9f,
-  0x0ba3, 0x0ba4,
-  0x0ba8, 0x0baa,
-  0x0bae, 0x0bb5,
-  0x0bb7, 0x0bb9,
-  0x0bbe, 0x0bc2,
-  0x0bc6, 0x0bc8,
-  0x0bca, 0x0bcd,
-  0x0bd7, 0x0bd7,
-  0x0be7, 0x0bfa,
-  0x0c01, 0x0c03,
-  0x0c05, 0x0c0c,
-  0x0c0e, 0x0c10,
-  0x0c12, 0x0c28,
-  0x0c2a, 0x0c33,
-  0x0c35, 0x0c39,
-  0x0c3e, 0x0c44,
-  0x0c46, 0x0c48,
-  0x0c4a, 0x0c4d,
-  0x0c55, 0x0c56,
-  0x0c60, 0x0c61,
-  0x0c66, 0x0c6f,
-  0x0c82, 0x0c83,
-  0x0c85, 0x0c8c,
-  0x0c8e, 0x0c90,
-  0x0c92, 0x0ca8,
-  0x0caa, 0x0cb3,
-  0x0cb5, 0x0cb9,
-  0x0cbc, 0x0cc4,
-  0x0cc6, 0x0cc8,
-  0x0cca, 0x0ccd,
-  0x0cd5, 0x0cd6,
-  0x0cde, 0x0cde,
-  0x0ce0, 0x0ce1,
-  0x0ce6, 0x0cef,
-  0x0d02, 0x0d03,
-  0x0d05, 0x0d0c,
-  0x0d0e, 0x0d10,
-  0x0d12, 0x0d28,
-  0x0d2a, 0x0d39,
-  0x0d3e, 0x0d43,
-  0x0d46, 0x0d48,
-  0x0d4a, 0x0d4d,
-  0x0d57, 0x0d57,
-  0x0d60, 0x0d61,
-  0x0d66, 0x0d6f,
-  0x0d82, 0x0d83,
-  0x0d85, 0x0d96,
-  0x0d9a, 0x0db1,
-  0x0db3, 0x0dbb,
-  0x0dbd, 0x0dbd,
-  0x0dc0, 0x0dc6,
-  0x0dca, 0x0dca,
-  0x0dcf, 0x0dd4,
-  0x0dd6, 0x0dd6,
-  0x0dd8, 0x0ddf,
-  0x0df2, 0x0df4,
-  0x0e01, 0x0e3a,
-  0x0e3f, 0x0e5b,
-  0x0e81, 0x0e82,
-  0x0e84, 0x0e84,
-  0x0e87, 0x0e88,
-  0x0e8a, 0x0e8a,
-  0x0e8d, 0x0e8d,
-  0x0e94, 0x0e97,
-  0x0e99, 0x0e9f,
-  0x0ea1, 0x0ea3,
-  0x0ea5, 0x0ea5,
-  0x0ea7, 0x0ea7,
-  0x0eaa, 0x0eab,
-  0x0ead, 0x0eb9,
-  0x0ebb, 0x0ebd,
-  0x0ec0, 0x0ec4,
-  0x0ec6, 0x0ec6,
-  0x0ec8, 0x0ecd,
-  0x0ed0, 0x0ed9,
-  0x0edc, 0x0edd,
-  0x0f00, 0x0f47,
-  0x0f49, 0x0f6a,
-  0x0f71, 0x0f8b,
-  0x0f90, 0x0f97,
-  0x0f99, 0x0fbc,
-  0x0fbe, 0x0fcc,
-  0x0fcf, 0x0fcf,
-  0x1000, 0x1021,
-  0x1023, 0x1027,
-  0x1029, 0x102a,
-  0x102c, 0x1032,
-  0x1036, 0x1039,
-  0x1040, 0x1059,
-  0x10a0, 0x10c5,
-  0x10d0, 0x10f8,
-  0x10fb, 0x10fb,
-  0x1100, 0x1159,
-  0x115f, 0x11a2,
-  0x11a8, 0x11f9,
-  0x1200, 0x1206,
-  0x1208, 0x1246,
-  0x1248, 0x1248,
-  0x124a, 0x124d,
-  0x1250, 0x1256,
-  0x1258, 0x1258,
-  0x125a, 0x125d,
-  0x1260, 0x1286,
-  0x1288, 0x1288,
-  0x128a, 0x128d,
-  0x1290, 0x12ae,
-  0x12b0, 0x12b0,
-  0x12b2, 0x12b5,
-  0x12b8, 0x12be,
-  0x12c0, 0x12c0,
-  0x12c2, 0x12c5,
-  0x12c8, 0x12ce,
-  0x12d0, 0x12d6,
-  0x12d8, 0x12ee,
-  0x12f0, 0x130e,
-  0x1310, 0x1310,
-  0x1312, 0x1315,
-  0x1318, 0x131e,
-  0x1320, 0x1346,
-  0x1348, 0x135a,
-  0x1361, 0x137c,
-  0x13a0, 0x13f4,
-  0x1401, 0x1676,
-  0x1681, 0x169c,
-  0x16a0, 0x16f0,
-  0x1700, 0x170c,
-  0x170e, 0x1714,
-  0x1720, 0x1736,
-  0x1740, 0x1753,
-  0x1760, 0x176c,
-  0x176e, 0x1770,
-  0x1772, 0x1773,
-  0x1780, 0x17dd,
-  0x17e0, 0x17e9,
-  0x17f0, 0x17f9,
-  0x1800, 0x180d,
-  0x1810, 0x1819,
-  0x1820, 0x1877,
-  0x1880, 0x18a9,
-  0x1900, 0x191c,
-  0x1920, 0x192b,
-  0x1930, 0x193b,
-  0x1940, 0x1940,
-  0x1944, 0x196d,
-  0x1970, 0x1974,
-  0x19e0, 0x19ff,
-  0x1d00, 0x1d6b,
-  0x1e00, 0x1e9b,
-  0x1ea0, 0x1ef9,
-  0x1f00, 0x1f15,
-  0x1f18, 0x1f1d,
-  0x1f20, 0x1f45,
-  0x1f48, 0x1f4d,
-  0x1f50, 0x1f57,
-  0x1f59, 0x1f59,
-  0x1f5b, 0x1f5b,
-  0x1f5d, 0x1f5d,
-  0x1f5f, 0x1f7d,
-  0x1f80, 0x1fb4,
-  0x1fb6, 0x1fc4,
-  0x1fc6, 0x1fd3,
-  0x1fd6, 0x1fdb,
-  0x1fdd, 0x1fef,
-  0x1ff2, 0x1ff4,
-  0x1ff6, 0x1ffe,
-  0x200b, 0x2027,
-  0x202a, 0x202e,
-  0x2030, 0x2054,
-  0x2057, 0x2057,
-  0x2060, 0x2063,
-  0x206a, 0x2071,
-  0x2074, 0x208e,
-  0x20a0, 0x20b1,
-  0x20d0, 0x20ea,
-  0x2100, 0x213b,
-  0x213d, 0x214b,
-  0x2153, 0x2183,
-  0x2190, 0x23d0,
-  0x2400, 0x2426,
-  0x2440, 0x244a,
-  0x2460, 0x2617,
-  0x2619, 0x267d,
-  0x2680, 0x2691,
-  0x26a0, 0x26a1,
-  0x2701, 0x2704,
-  0x2706, 0x2709,
-  0x270c, 0x2727,
-  0x2729, 0x274b,
-  0x274d, 0x274d,
-  0x274f, 0x2752,
-  0x2756, 0x2756,
-  0x2758, 0x275e,
-  0x2761, 0x2794,
-  0x2798, 0x27af,
-  0x27b1, 0x27be,
-  0x27d0, 0x27eb,
-  0x27f0, 0x2b0d,
-  0x2e80, 0x2e99,
-  0x2e9b, 0x2ef3,
-  0x2f00, 0x2fd5,
-  0x2ff0, 0x2ffb,
-  0x3001, 0x303f,
-  0x3041, 0x3096,
-  0x3099, 0x30ff,
-  0x3105, 0x312c,
-  0x3131, 0x318e,
-  0x3190, 0x31b7,
-  0x31f0, 0x321e,
-  0x3220, 0x3243,
-  0x3250, 0x327d,
-  0x327f, 0x32fe,
-  0x3300, 0x4db5,
-  0x4dc0, 0x9fa5,
-  0xa000, 0xa48c,
-  0xa490, 0xa4c6,
-  0xac00, 0xd7a3,
-  0xe000, 0xfa2d,
-  0xfa30, 0xfa6a,
-  0xfb00, 0xfb06,
-  0xfb13, 0xfb17,
-  0xfb1d, 0xfb36,
-  0xfb38, 0xfb3c,
-  0xfb3e, 0xfb3e,
-  0xfb40, 0xfb41,
-  0xfb43, 0xfb44,
-  0xfb46, 0xfbb1,
-  0xfbd3, 0xfd3f,
-  0xfd50, 0xfd8f,
-  0xfd92, 0xfdc7,
-  0xfdf0, 0xfdfd,
-  0xfe00, 0xfe0f,
-  0xfe20, 0xfe23,
-  0xfe30, 0xfe52,
-  0xfe54, 0xfe66,
-  0xfe68, 0xfe6b,
-  0xfe70, 0xfe74,
-  0xfe76, 0xfefc,
-  0xfeff, 0xfeff,
-  0xff01, 0xffbe,
-  0xffc2, 0xffc7,
-  0xffca, 0xffcf,
-  0xffd2, 0xffd7,
-  0xffda, 0xffdc,
-  0xffe0, 0xffe6,
-  0xffe8, 0xffee,
-  0xfff9, 0xfffd,
-  0x10000, 0x1000b,
-  0x1000d, 0x10026,
-  0x10028, 0x1003a,
-  0x1003c, 0x1003d,
-  0x1003f, 0x1004d,
-  0x10050, 0x1005d,
-  0x10080, 0x100fa,
-  0x10100, 0x10102,
-  0x10107, 0x10133,
-  0x10137, 0x1013f,
-  0x10300, 0x1031e,
-  0x10320, 0x10323,
-  0x10330, 0x1034a,
-  0x10380, 0x1039d,
-  0x1039f, 0x1039f,
-  0x10400, 0x1049d,
-  0x104a0, 0x104a9,
-  0x10800, 0x10805,
-  0x10808, 0x10808,
-  0x1080a, 0x10835,
-  0x10837, 0x10838,
-  0x1083c, 0x1083c,
-  0x1083f, 0x1083f,
-  0x1d000, 0x1d0f5,
-  0x1d100, 0x1d126,
-  0x1d12a, 0x1d1dd,
-  0x1d300, 0x1d356,
-  0x1d400, 0x1d454,
-  0x1d456, 0x1d49c,
-  0x1d49e, 0x1d49f,
-  0x1d4a2, 0x1d4a2,
-  0x1d4a5, 0x1d4a6,
-  0x1d4a9, 0x1d4ac,
-  0x1d4ae, 0x1d4b9,
-  0x1d4bb, 0x1d4bb,
-  0x1d4bd, 0x1d4c3,
-  0x1d4c5, 0x1d505,
-  0x1d507, 0x1d50a,
-  0x1d50d, 0x1d514,
-  0x1d516, 0x1d51c,
-  0x1d51e, 0x1d539,
-  0x1d53b, 0x1d53e,
-  0x1d540, 0x1d544,
-  0x1d546, 0x1d546,
-  0x1d54a, 0x1d550,
-  0x1d552, 0x1d6a3,
-  0x1d6a8, 0x1d7c9,
-  0x1d7ce, 0x1d7ff,
-  0x20000, 0x2a6d6,
-  0x2f800, 0x2fa1d,
-  0xe0001, 0xe0001,
-  0xe0020, 0xe007f,
-  0xe0100, 0xe01ef,
-  0xf0000, 0xffffd,
-  0x100000, 0x10fffd
-#endif /* USE_UNICODE_FULL_RANGE_CTYPE */
-}; /* end of CRGraph */
-
-static const OnigCodePoint CRLower[] = {
-#ifdef USE_UNICODE_FULL_RANGE_CTYPE
-  424,
-#else
-  6,
-#endif
+/* 'Lower': [[:Lower:]] */
+static const OnigCodePoint CR_Lower[] = {
+  480,
   0x0061, 0x007a,
   0x00aa, 0x00aa,
   0x00b5, 0x00b5,
   0x00ba, 0x00ba,
   0x00df, 0x00f6,
-  0x00f8, 0x00ff
-#ifdef USE_UNICODE_FULL_RANGE_CTYPE
-  ,
+  0x00f8, 0x00ff,
   0x0101, 0x0101,
   0x0103, 0x0103,
   0x0105, 0x0105,
@@ -1543,7 +1136,9 @@ static const OnigCodePoint CRLower[] = {
   0x022d, 0x022d,
   0x022f, 0x022f,
   0x0231, 0x0231,
-  0x0233, 0x0236,
+  0x0233, 0x0239,
+  0x023c, 0x023c,
+  0x023f, 0x0240,
   0x0250, 0x02af,
   0x0390, 0x0390,
   0x03ac, 0x03ce,
@@ -1563,7 +1158,7 @@ static const OnigCodePoint CRLower[] = {
   0x03ef, 0x03f3,
   0x03f5, 0x03f5,
   0x03f8, 0x03f8,
-  0x03fb, 0x03fb,
+  0x03fb, 0x03fc,
   0x0430, 0x045f,
   0x0461, 0x0461,
   0x0463, 0x0463,
@@ -1635,6 +1230,7 @@ static const OnigCodePoint CRLower[] = {
   0x04f1, 0x04f1,
   0x04f3, 0x04f3,
   0x04f5, 0x04f5,
+  0x04f7, 0x04f7,
   0x04f9, 0x04f9,
   0x0501, 0x0501,
   0x0503, 0x0503,
@@ -1646,7 +1242,8 @@ static const OnigCodePoint CRLower[] = {
   0x050f, 0x050f,
   0x0561, 0x0587,
   0x1d00, 0x1d2b,
-  0x1d62, 0x1d6b,
+  0x1d62, 0x1d77,
+  0x1d79, 0x1d9a,
   0x1e01, 0x1e01,
   0x1e03, 0x1e03,
   0x1e05, 0x1e05,
@@ -1796,8 +1393,60 @@ static const OnigCodePoint CRLower[] = {
   0x212f, 0x212f,
   0x2134, 0x2134,
   0x2139, 0x2139,
-  0x213d, 0x213d,
+  0x213c, 0x213d,
   0x2146, 0x2149,
+  0x2c30, 0x2c5e,
+  0x2c81, 0x2c81,
+  0x2c83, 0x2c83,
+  0x2c85, 0x2c85,
+  0x2c87, 0x2c87,
+  0x2c89, 0x2c89,
+  0x2c8b, 0x2c8b,
+  0x2c8d, 0x2c8d,
+  0x2c8f, 0x2c8f,
+  0x2c91, 0x2c91,
+  0x2c93, 0x2c93,
+  0x2c95, 0x2c95,
+  0x2c97, 0x2c97,
+  0x2c99, 0x2c99,
+  0x2c9b, 0x2c9b,
+  0x2c9d, 0x2c9d,
+  0x2c9f, 0x2c9f,
+  0x2ca1, 0x2ca1,
+  0x2ca3, 0x2ca3,
+  0x2ca5, 0x2ca5,
+  0x2ca7, 0x2ca7,
+  0x2ca9, 0x2ca9,
+  0x2cab, 0x2cab,
+  0x2cad, 0x2cad,
+  0x2caf, 0x2caf,
+  0x2cb1, 0x2cb1,
+  0x2cb3, 0x2cb3,
+  0x2cb5, 0x2cb5,
+  0x2cb7, 0x2cb7,
+  0x2cb9, 0x2cb9,
+  0x2cbb, 0x2cbb,
+  0x2cbd, 0x2cbd,
+  0x2cbf, 0x2cbf,
+  0x2cc1, 0x2cc1,
+  0x2cc3, 0x2cc3,
+  0x2cc5, 0x2cc5,
+  0x2cc7, 0x2cc7,
+  0x2cc9, 0x2cc9,
+  0x2ccb, 0x2ccb,
+  0x2ccd, 0x2ccd,
+  0x2ccf, 0x2ccf,
+  0x2cd1, 0x2cd1,
+  0x2cd3, 0x2cd3,
+  0x2cd5, 0x2cd5,
+  0x2cd7, 0x2cd7,
+  0x2cd9, 0x2cd9,
+  0x2cdb, 0x2cdb,
+  0x2cdd, 0x2cdd,
+  0x2cdf, 0x2cdf,
+  0x2ce1, 0x2ce1,
+  0x2ce3, 0x2ce4,
+  0x2d00, 0x2d25,
   0xfb00, 0xfb06,
   0xfb13, 0xfb17,
   0xff41, 0xff5a,
@@ -1818,7 +1467,7 @@ static const OnigCodePoint CRLower[] = {
   0x1d5ee, 0x1d607,
   0x1d622, 0x1d63b,
   0x1d656, 0x1d66f,
-  0x1d68a, 0x1d6a3,
+  0x1d68a, 0x1d6a5,
   0x1d6c2, 0x1d6da,
   0x1d6dc, 0x1d6e1,
   0x1d6fc, 0x1d714,
@@ -1829,23 +1478,16 @@ static const OnigCodePoint CRLower[] = {
   0x1d78a, 0x1d78f,
   0x1d7aa, 0x1d7c2,
   0x1d7c4, 0x1d7c9
-#endif /* USE_UNICODE_FULL_RANGE_CTYPE */
-}; /* end of CRLower */
+}; /* CR_Lower */
 
-static const OnigCodePoint CRPrint[] = {
-#ifdef USE_UNICODE_FULL_RANGE_CTYPE
-  405,
-#else
-  4,
-#endif
+/* 'Print': [[:Print:]] */
+static const OnigCodePoint CR_Print[] = {
+  423,
   0x0009, 0x000d,
   0x0020, 0x007e,
   0x0085, 0x0085,
-  0x00a0, 0x0236
-#ifdef USE_UNICODE_FULL_RANGE_CTYPE
-  ,
-  0x0250, 0x0357,
-  0x035d, 0x036f,
+  0x00a0, 0x0241,
+  0x0250, 0x036f,
   0x0374, 0x0375,
   0x037a, 0x037a,
   0x037e, 0x037e,
@@ -1853,35 +1495,33 @@ static const OnigCodePoint CRPrint[] = {
   0x038c, 0x038c,
   0x038e, 0x03a1,
   0x03a3, 0x03ce,
-  0x03d0, 0x03fb,
-  0x0400, 0x0486,
+  0x03d0, 0x0486,
   0x0488, 0x04ce,
-  0x04d0, 0x04f5,
-  0x04f8, 0x04f9,
+  0x04d0, 0x04f9,
   0x0500, 0x050f,
   0x0531, 0x0556,
   0x0559, 0x055f,
   0x0561, 0x0587,
   0x0589, 0x058a,
-  0x0591, 0x05a1,
-  0x05a3, 0x05b9,
-  0x05bb, 0x05c4,
+  0x0591, 0x05b9,
+  0x05bb, 0x05c7,
   0x05d0, 0x05ea,
   0x05f0, 0x05f4,
   0x0600, 0x0603,
-  0x060c, 0x0615,
+  0x060b, 0x0615,
   0x061b, 0x061b,
-  0x061f, 0x061f,
+  0x061e, 0x061f,
   0x0621, 0x063a,
-  0x0640, 0x0658,
+  0x0640, 0x065e,
   0x0660, 0x070d,
   0x070f, 0x074a,
-  0x074d, 0x074f,
+  0x074d, 0x076d,
   0x0780, 0x07b1,
   0x0901, 0x0939,
   0x093c, 0x094d,
   0x0950, 0x0954,
   0x0958, 0x0970,
+  0x097d, 0x097d,
   0x0981, 0x0983,
   0x0985, 0x098c,
   0x098f, 0x0990,
@@ -1891,7 +1531,7 @@ static const OnigCodePoint CRPrint[] = {
   0x09b6, 0x09b9,
   0x09bc, 0x09c4,
   0x09c7, 0x09c8,
-  0x09cb, 0x09cd,
+  0x09cb, 0x09ce,
   0x09d7, 0x09d7,
   0x09dc, 0x09dd,
   0x09df, 0x09e3,
@@ -1948,13 +1588,12 @@ static const OnigCodePoint CRPrint[] = {
   0x0b9e, 0x0b9f,
   0x0ba3, 0x0ba4,
   0x0ba8, 0x0baa,
-  0x0bae, 0x0bb5,
-  0x0bb7, 0x0bb9,
+  0x0bae, 0x0bb9,
   0x0bbe, 0x0bc2,
   0x0bc6, 0x0bc8,
   0x0bca, 0x0bcd,
   0x0bd7, 0x0bd7,
-  0x0be7, 0x0bfa,
+  0x0be6, 0x0bfa,
   0x0c01, 0x0c03,
   0x0c05, 0x0c0c,
   0x0c0e, 0x0c10,
@@ -2028,7 +1667,7 @@ static const OnigCodePoint CRPrint[] = {
   0x0f90, 0x0f97,
   0x0f99, 0x0fbc,
   0x0fbe, 0x0fcc,
-  0x0fcf, 0x0fcf,
+  0x0fcf, 0x0fd1,
   0x1000, 0x1021,
   0x1023, 0x1027,
   0x1029, 0x102a,
@@ -2036,37 +1675,28 @@ static const OnigCodePoint CRPrint[] = {
   0x1036, 0x1039,
   0x1040, 0x1059,
   0x10a0, 0x10c5,
-  0x10d0, 0x10f8,
-  0x10fb, 0x10fb,
+  0x10d0, 0x10fc,
   0x1100, 0x1159,
   0x115f, 0x11a2,
   0x11a8, 0x11f9,
-  0x1200, 0x1206,
-  0x1208, 0x1246,
-  0x1248, 0x1248,
+  0x1200, 0x1248,
   0x124a, 0x124d,
   0x1250, 0x1256,
   0x1258, 0x1258,
   0x125a, 0x125d,
-  0x1260, 0x1286,
-  0x1288, 0x1288,
+  0x1260, 0x1288,
   0x128a, 0x128d,
-  0x1290, 0x12ae,
-  0x12b0, 0x12b0,
+  0x1290, 0x12b0,
   0x12b2, 0x12b5,
   0x12b8, 0x12be,
   0x12c0, 0x12c0,
   0x12c2, 0x12c5,
-  0x12c8, 0x12ce,
-  0x12d0, 0x12d6,
-  0x12d8, 0x12ee,
-  0x12f0, 0x130e,
-  0x1310, 0x1310,
+  0x12c8, 0x12d6,
+  0x12d8, 0x1310,
   0x1312, 0x1315,
-  0x1318, 0x131e,
-  0x1320, 0x1346,
-  0x1348, 0x135a,
-  0x1361, 0x137c,
+  0x1318, 0x135a,
+  0x135f, 0x137c,
+  0x1380, 0x1399,
   0x13a0, 0x13f4,
   0x1401, 0x1676,
   0x1680, 0x169c,
@@ -2091,8 +1721,12 @@ static const OnigCodePoint CRPrint[] = {
   0x1940, 0x1940,
   0x1944, 0x196d,
   0x1970, 0x1974,
-  0x19e0, 0x19ff,
-  0x1d00, 0x1d6b,
+  0x1980, 0x19a9,
+  0x19b0, 0x19c9,
+  0x19d0, 0x19d9,
+  0x19de, 0x1a1b,
+  0x1a1e, 0x1a1f,
+  0x1d00, 0x1dc3,
   0x1e00, 0x1e9b,
   0x1ea0, 0x1ef9,
   0x1f00, 0x1f15,
@@ -2111,23 +1745,19 @@ static const OnigCodePoint CRPrint[] = {
   0x1fdd, 0x1fef,
   0x1ff2, 0x1ff4,
   0x1ff6, 0x1ffe,
-  0x2000, 0x2054,
-  0x2057, 0x2057,
-  0x205f, 0x2063,
+  0x2000, 0x2063,
   0x206a, 0x2071,
   0x2074, 0x208e,
-  0x20a0, 0x20b1,
-  0x20d0, 0x20ea,
-  0x2100, 0x213b,
-  0x213d, 0x214b,
+  0x2090, 0x2094,
+  0x20a0, 0x20b5,
+  0x20d0, 0x20eb,
+  0x2100, 0x214c,
   0x2153, 0x2183,
-  0x2190, 0x23d0,
+  0x2190, 0x23db,
   0x2400, 0x2426,
   0x2440, 0x244a,
-  0x2460, 0x2617,
-  0x2619, 0x267d,
-  0x2680, 0x2691,
-  0x26a0, 0x26a1,
+  0x2460, 0x269c,
+  0x26a0, 0x26b1,
   0x2701, 0x2704,
   0x2706, 0x2709,
   0x270c, 0x2727,
@@ -2139,8 +1769,26 @@ static const OnigCodePoint CRPrint[] = {
   0x2761, 0x2794,
   0x2798, 0x27af,
   0x27b1, 0x27be,
+  0x27c0, 0x27c6,
   0x27d0, 0x27eb,
-  0x27f0, 0x2b0d,
+  0x27f0, 0x2b13,
+  0x2c00, 0x2c2e,
+  0x2c30, 0x2c5e,
+  0x2c80, 0x2cea,
+  0x2cf9, 0x2d25,
+  0x2d30, 0x2d65,
+  0x2d6f, 0x2d6f,
+  0x2d80, 0x2d96,
+  0x2da0, 0x2da6,
+  0x2da8, 0x2dae,
+  0x2db0, 0x2db6,
+  0x2db8, 0x2dbe,
+  0x2dc0, 0x2dc6,
+  0x2dc8, 0x2dce,
+  0x2dd0, 0x2dd6,
+  0x2dd8, 0x2dde,
+  0x2e00, 0x2e17,
+  0x2e1c, 0x2e1d,
   0x2e80, 0x2e99,
   0x2e9b, 0x2ef3,
   0x2f00, 0x2fd5,
@@ -2151,17 +1799,20 @@ static const OnigCodePoint CRPrint[] = {
   0x3105, 0x312c,
   0x3131, 0x318e,
   0x3190, 0x31b7,
+  0x31c0, 0x31cf,
   0x31f0, 0x321e,
   0x3220, 0x3243,
-  0x3250, 0x327d,
-  0x327f, 0x32fe,
+  0x3250, 0x32fe,
   0x3300, 0x4db5,
-  0x4dc0, 0x9fa5,
+  0x4dc0, 0x9fbb,
   0xa000, 0xa48c,
   0xa490, 0xa4c6,
+  0xa700, 0xa716,
+  0xa800, 0xa82b,
   0xac00, 0xd7a3,
   0xe000, 0xfa2d,
   0xfa30, 0xfa6a,
+  0xfa70, 0xfad9,
   0xfb00, 0xfb06,
   0xfb13, 0xfb17,
   0xfb1d, 0xfb36,
@@ -2174,7 +1825,7 @@ static const OnigCodePoint CRPrint[] = {
   0xfd50, 0xfd8f,
   0xfd92, 0xfdc7,
   0xfdf0, 0xfdfd,
-  0xfe00, 0xfe0f,
+  0xfe00, 0xfe19,
   0xfe20, 0xfe23,
   0xfe30, 0xfe52,
   0xfe54, 0xfe66,
@@ -2199,12 +1850,13 @@ static const OnigCodePoint CRPrint[] = {
   0x10080, 0x100fa,
   0x10100, 0x10102,
   0x10107, 0x10133,
-  0x10137, 0x1013f,
+  0x10137, 0x1018a,
   0x10300, 0x1031e,
   0x10320, 0x10323,
   0x10330, 0x1034a,
   0x10380, 0x1039d,
-  0x1039f, 0x1039f,
+  0x1039f, 0x103c3,
+  0x103c8, 0x103d5,
   0x10400, 0x1049d,
   0x104a0, 0x104a9,
   0x10800, 0x10805,
@@ -2213,9 +1865,18 @@ static const OnigCodePoint CRPrint[] = {
   0x10837, 0x10838,
   0x1083c, 0x1083c,
   0x1083f, 0x1083f,
+  0x10a00, 0x10a03,
+  0x10a05, 0x10a06,
+  0x10a0c, 0x10a13,
+  0x10a15, 0x10a17,
+  0x10a19, 0x10a33,
+  0x10a38, 0x10a3a,
+  0x10a3f, 0x10a47,
+  0x10a50, 0x10a58,
   0x1d000, 0x1d0f5,
   0x1d100, 0x1d126,
   0x1d12a, 0x1d1dd,
+  0x1d200, 0x1d245,
   0x1d300, 0x1d356,
   0x1d400, 0x1d454,
   0x1d456, 0x1d49c,
@@ -2235,7 +1896,7 @@ static const OnigCodePoint CRPrint[] = {
   0x1d540, 0x1d544,
   0x1d546, 0x1d546,
   0x1d54a, 0x1d550,
-  0x1d552, 0x1d6a3,
+  0x1d552, 0x1d6a5,
   0x1d6a8, 0x1d7c9,
   0x1d7ce, 0x1d7ff,
   0x20000, 0x2a6d6,
@@ -2245,15 +1906,11 @@ static const OnigCodePoint CRPrint[] = {
   0xe0100, 0xe01ef,
   0xf0000, 0xffffd,
   0x100000, 0x10fffd
-#endif /* USE_UNICODE_FULL_RANGE_CTYPE */
-}; /* end of CRPrint */
+}; /* CR_Print */
 
-static const OnigCodePoint CRPunct[] = {
-#ifdef USE_UNICODE_FULL_RANGE_CTYPE
-  86,
-#else
-  14,
-#endif
+/* 'Punct': [[:Punct:]] */
+static const OnigCodePoint CR_Punct[] = {
+  96,
   0x0021, 0x0023,
   0x0025, 0x002a,
   0x002c, 0x002f,
@@ -2267,9 +1924,7 @@ static const OnigCodePoint CRPunct[] = {
   0x00ab, 0x00ab,
   0x00b7, 0x00b7,
   0x00bb, 0x00bb,
-  0x00bf, 0x00bf
-#ifdef USE_UNICODE_FULL_RANGE_CTYPE
-  ,
+  0x00bf, 0x00bf,
   0x037e, 0x037e,
   0x0387, 0x0387,
   0x055a, 0x055f,
@@ -2277,10 +1932,11 @@ static const OnigCodePoint CRPunct[] = {
   0x05be, 0x05be,
   0x05c0, 0x05c0,
   0x05c3, 0x05c3,
+  0x05c6, 0x05c6,
   0x05f3, 0x05f4,
   0x060c, 0x060d,
   0x061b, 0x061b,
-  0x061f, 0x061f,
+  0x061e, 0x061f,
   0x066a, 0x066d,
   0x06d4, 0x06d4,
   0x0700, 0x070d,
@@ -2292,6 +1948,7 @@ static const OnigCodePoint CRPunct[] = {
   0x0f04, 0x0f12,
   0x0f3a, 0x0f3d,
   0x0f85, 0x0f85,
+  0x0fd0, 0x0fd1,
   0x104a, 0x104f,
   0x10fb, 0x10fb,
   0x1361, 0x1368,
@@ -2303,20 +1960,26 @@ static const OnigCodePoint CRPunct[] = {
   0x17d8, 0x17da,
   0x1800, 0x180a,
   0x1944, 0x1945,
+  0x19de, 0x19df,
+  0x1a1e, 0x1a1f,
   0x2010, 0x2027,
   0x2030, 0x2043,
   0x2045, 0x2051,
-  0x2053, 0x2054,
-  0x2057, 0x2057,
+  0x2053, 0x205e,
   0x207d, 0x207e,
   0x208d, 0x208e,
   0x2329, 0x232a,
   0x23b4, 0x23b6,
   0x2768, 0x2775,
+  0x27c5, 0x27c6,
   0x27e6, 0x27eb,
   0x2983, 0x2998,
   0x29d8, 0x29db,
   0x29fc, 0x29fd,
+  0x2cf9, 0x2cfc,
+  0x2cfe, 0x2cff,
+  0x2e00, 0x2e17,
+  0x2e1c, 0x2e1d,
   0x3001, 0x3003,
   0x3008, 0x3011,
   0x3014, 0x301f,
@@ -2325,6 +1988,7 @@ static const OnigCodePoint CRPunct[] = {
   0x30a0, 0x30a0,
   0x30fb, 0x30fb,
   0xfd3e, 0xfd3f,
+  0xfe10, 0xfe19,
   0xfe30, 0xfe52,
   0xfe54, 0xfe61,
   0xfe63, 0xfe63,
@@ -2341,22 +2005,17 @@ static const OnigCodePoint CRPunct[] = {
   0xff5d, 0xff5d,
   0xff5f, 0xff65,
   0x10100, 0x10101,
-  0x1039f, 0x1039f
-#endif /* USE_UNICODE_FULL_RANGE_CTYPE */
-}; /* end of CRPunct */
+  0x1039f, 0x1039f,
+  0x10a50, 0x10a58
+}; /* CR_Punct */
 
-static const OnigCodePoint CRSpace[] = {
-#ifdef USE_UNICODE_FULL_RANGE_CTYPE
+/* 'Space': [[:Space:]] */
+static const OnigCodePoint CR_Space[] = {
   11,
-#else
-  4,
-#endif
   0x0009, 0x000d,
   0x0020, 0x0020,
   0x0085, 0x0085,
-  0x00a0, 0x00a0
-#ifdef USE_UNICODE_FULL_RANGE_CTYPE
-  ,
+  0x00a0, 0x00a0,
   0x1680, 0x1680,
   0x180e, 0x180e,
   0x2000, 0x200a,
@@ -2364,20 +2023,14 @@ static const OnigCodePoint CRSpace[] = {
   0x202f, 0x202f,
   0x205f, 0x205f,
   0x3000, 0x3000
-#endif /* USE_UNICODE_FULL_RANGE_CTYPE */
-}; /* end of CRSpace */
+}; /* CR_Space */
 
-static const OnigCodePoint CRUpper[] = {
-#ifdef USE_UNICODE_FULL_RANGE_CTYPE
-  421,
-#else
-  3,
-#endif
+/* 'Upper': [[:Upper:]] */
+static const OnigCodePoint CR_Upper[] = {
+  476,
   0x0041, 0x005a,
   0x00c0, 0x00d6,
-  0x00d8, 0x00de
-#ifdef USE_UNICODE_FULL_RANGE_CTYPE
-  ,
+  0x00d8, 0x00de,
   0x0100, 0x0100,
   0x0102, 0x0102,
   0x0104, 0x0104,
@@ -2511,6 +2164,9 @@ static const OnigCodePoint CRUpper[] = {
   0x022e, 0x022e,
   0x0230, 0x0230,
   0x0232, 0x0232,
+  0x023a, 0x023b,
+  0x023d, 0x023e,
+  0x0241, 0x0241,
   0x0386, 0x0386,
   0x0388, 0x038a,
   0x038c, 0x038c,
@@ -2533,7 +2189,7 @@ static const OnigCodePoint CRUpper[] = {
   0x03f4, 0x03f4,
   0x03f7, 0x03f7,
   0x03f9, 0x03fa,
-  0x0400, 0x042f,
+  0x03fd, 0x042f,
   0x0460, 0x0460,
   0x0462, 0x0462,
   0x0464, 0x0464,
@@ -2604,6 +2260,7 @@ static const OnigCodePoint CRUpper[] = {
   0x04f0, 0x04f0,
   0x04f2, 0x04f2,
   0x04f4, 0x04f4,
+  0x04f6, 0x04f6,
   0x04f8, 0x04f8,
   0x0500, 0x0500,
   0x0502, 0x0502,
@@ -2764,6 +2421,57 @@ static const OnigCodePoint CRUpper[] = {
   0x2133, 0x2133,
   0x213e, 0x213f,
   0x2145, 0x2145,
+  0x2c00, 0x2c2e,
+  0x2c80, 0x2c80,
+  0x2c82, 0x2c82,
+  0x2c84, 0x2c84,
+  0x2c86, 0x2c86,
+  0x2c88, 0x2c88,
+  0x2c8a, 0x2c8a,
+  0x2c8c, 0x2c8c,
+  0x2c8e, 0x2c8e,
+  0x2c90, 0x2c90,
+  0x2c92, 0x2c92,
+  0x2c94, 0x2c94,
+  0x2c96, 0x2c96,
+  0x2c98, 0x2c98,
+  0x2c9a, 0x2c9a,
+  0x2c9c, 0x2c9c,
+  0x2c9e, 0x2c9e,
+  0x2ca0, 0x2ca0,
+  0x2ca2, 0x2ca2,
+  0x2ca4, 0x2ca4,
+  0x2ca6, 0x2ca6,
+  0x2ca8, 0x2ca8,
+  0x2caa, 0x2caa,
+  0x2cac, 0x2cac,
+  0x2cae, 0x2cae,
+  0x2cb0, 0x2cb0,
+  0x2cb2, 0x2cb2,
+  0x2cb4, 0x2cb4,
+  0x2cb6, 0x2cb6,
+  0x2cb8, 0x2cb8,
+  0x2cba, 0x2cba,
+  0x2cbc, 0x2cbc,
+  0x2cbe, 0x2cbe,
+  0x2cc0, 0x2cc0,
+  0x2cc2, 0x2cc2,
+  0x2cc4, 0x2cc4,
+  0x2cc6, 0x2cc6,
+  0x2cc8, 0x2cc8,
+  0x2cca, 0x2cca,
+  0x2ccc, 0x2ccc,
+  0x2cce, 0x2cce,
+  0x2cd0, 0x2cd0,
+  0x2cd2, 0x2cd2,
+  0x2cd4, 0x2cd4,
+  0x2cd6, 0x2cd6,
+  0x2cd8, 0x2cd8,
+  0x2cda, 0x2cda,
+  0x2cdc, 0x2cdc,
+  0x2cde, 0x2cde,
+  0x2ce0, 0x2ce0,
+  0x2ce2, 0x2ce2,
   0xff21, 0xff3a,
   0x10400, 0x10427,
   0x1d400, 0x1d419,
@@ -2796,35 +2504,19 @@ static const OnigCodePoint CRUpper[] = {
   0x1d71c, 0x1d734,
   0x1d756, 0x1d76e,
   0x1d790, 0x1d7a8
-#endif /* USE_UNICODE_FULL_RANGE_CTYPE */
-}; /* end of CRUpper */
+}; /* CR_Upper */
 
-static const OnigCodePoint CRXDigit[] = {
-#ifdef USE_UNICODE_FULL_RANGE_CTYPE
-  3,
-#else
+/* 'XDigit': [[:XDigit:]] */
+static const OnigCodePoint CR_XDigit[] = {
   3,
-#endif
   0x0030, 0x0039,
   0x0041, 0x0046,
   0x0061, 0x0066
-};
+}; /* CR_XDigit */
 
-static const OnigCodePoint CRASCII[] = {
-#ifdef USE_UNICODE_FULL_RANGE_CTYPE
-  1,
-#else
-  1,
-#endif
-  0x0000, 0x007f
-};
-
-static const OnigCodePoint CRWord[] = {
-#ifdef USE_UNICODE_FULL_RANGE_CTYPE
-  436,
-#else
-  12,
-#endif
+/* 'Word': [[:Word:]] */
+static const OnigCodePoint CR_Word[] = {
+  464,
   0x0030, 0x0039,
   0x0041, 0x005a,
   0x005f, 0x005f,
@@ -2836,16 +2528,12 @@ static const OnigCodePoint CRWord[] = {
   0x00bc, 0x00be,
   0x00c0, 0x00d6,
   0x00d8, 0x00f6,
-#ifndef USE_UNICODE_FULL_RANGE_CTYPE
-  0x00f8, 0x7fffffff
-#else /* not USE_UNICODE_FULL_RANGE_CTYPE */
-  0x00f8, 0x0236,
+  0x00f8, 0x0241,
   0x0250, 0x02c1,
   0x02c6, 0x02d1,
   0x02e0, 0x02e4,
   0x02ee, 0x02ee,
-  0x0300, 0x0357,
-  0x035d, 0x036f,
+  0x0300, 0x036f,
   0x037a, 0x037a,
   0x0386, 0x0386,
   0x0388, 0x038a,
@@ -2853,27 +2541,25 @@ static const OnigCodePoint CRWord[] = {
   0x038e, 0x03a1,
   0x03a3, 0x03ce,
   0x03d0, 0x03f5,
-  0x03f7, 0x03fb,
-  0x0400, 0x0481,
+  0x03f7, 0x0481,
   0x0483, 0x0486,
   0x0488, 0x04ce,
-  0x04d0, 0x04f5,
-  0x04f8, 0x04f9,
+  0x04d0, 0x04f9,
   0x0500, 0x050f,
   0x0531, 0x0556,
   0x0559, 0x0559,
   0x0561, 0x0587,
-  0x0591, 0x05a1,
-  0x05a3, 0x05b9,
+  0x0591, 0x05b9,
   0x05bb, 0x05bd,
   0x05bf, 0x05bf,
   0x05c1, 0x05c2,
-  0x05c4, 0x05c4,
+  0x05c4, 0x05c5,
+  0x05c7, 0x05c7,
   0x05d0, 0x05ea,
   0x05f0, 0x05f2,
   0x0610, 0x0615,
   0x0621, 0x063a,
-  0x0640, 0x0658,
+  0x0640, 0x065e,
   0x0660, 0x0669,
   0x066e, 0x06d3,
   0x06d5, 0x06dc,
@@ -2881,13 +2567,14 @@ static const OnigCodePoint CRWord[] = {
   0x06ea, 0x06fc,
   0x06ff, 0x06ff,
   0x0710, 0x074a,
-  0x074d, 0x074f,
+  0x074d, 0x076d,
   0x0780, 0x07b1,
   0x0901, 0x0939,
   0x093c, 0x094d,
   0x0950, 0x0954,
   0x0958, 0x0963,
   0x0966, 0x096f,
+  0x097d, 0x097d,
   0x0981, 0x0983,
   0x0985, 0x098c,
   0x098f, 0x0990,
@@ -2897,7 +2584,7 @@ static const OnigCodePoint CRWord[] = {
   0x09b6, 0x09b9,
   0x09bc, 0x09c4,
   0x09c7, 0x09c8,
-  0x09cb, 0x09cd,
+  0x09cb, 0x09ce,
   0x09d7, 0x09d7,
   0x09dc, 0x09dd,
   0x09df, 0x09e3,
@@ -2955,13 +2642,12 @@ static const OnigCodePoint CRWord[] = {
   0x0b9e, 0x0b9f,
   0x0ba3, 0x0ba4,
   0x0ba8, 0x0baa,
-  0x0bae, 0x0bb5,
-  0x0bb7, 0x0bb9,
+  0x0bae, 0x0bb9,
   0x0bbe, 0x0bc2,
   0x0bc6, 0x0bc8,
   0x0bca, 0x0bcd,
   0x0bd7, 0x0bd7,
-  0x0be7, 0x0bf2,
+  0x0be6, 0x0bf2,
   0x0c01, 0x0c03,
   0x0c05, 0x0c0c,
   0x0c0e, 0x0c10,
@@ -3051,36 +2737,30 @@ static const OnigCodePoint CRWord[] = {
   0x1040, 0x1049,
   0x1050, 0x1059,
   0x10a0, 0x10c5,
-  0x10d0, 0x10f8,
+  0x10d0, 0x10fa,
+  0x10fc, 0x10fc,
   0x1100, 0x1159,
   0x115f, 0x11a2,
   0x11a8, 0x11f9,
-  0x1200, 0x1206,
-  0x1208, 0x1246,
-  0x1248, 0x1248,
+  0x1200, 0x1248,
   0x124a, 0x124d,
   0x1250, 0x1256,
   0x1258, 0x1258,
   0x125a, 0x125d,
-  0x1260, 0x1286,
-  0x1288, 0x1288,
+  0x1260, 0x1288,
   0x128a, 0x128d,
-  0x1290, 0x12ae,
-  0x12b0, 0x12b0,
+  0x1290, 0x12b0,
   0x12b2, 0x12b5,
   0x12b8, 0x12be,
   0x12c0, 0x12c0,
   0x12c2, 0x12c5,
-  0x12c8, 0x12ce,
-  0x12d0, 0x12d6,
-  0x12d8, 0x12ee,
-  0x12f0, 0x130e,
-  0x1310, 0x1310,
+  0x12c8, 0x12d6,
+  0x12d8, 0x1310,
   0x1312, 0x1315,
-  0x1318, 0x131e,
-  0x1320, 0x1346,
-  0x1348, 0x135a,
+  0x1318, 0x135a,
+  0x135f, 0x135f,
   0x1369, 0x137c,
+  0x1380, 0x138f,
   0x13a0, 0x13f4,
   0x1401, 0x166c,
   0x166f, 0x1676,
@@ -3109,7 +2789,11 @@ static const OnigCodePoint CRWord[] = {
   0x1930, 0x193b,
   0x1946, 0x196d,
   0x1970, 0x1974,
-  0x1d00, 0x1d6b,
+  0x1980, 0x19a9,
+  0x19b0, 0x19c9,
+  0x19d0, 0x19d9,
+  0x1a00, 0x1a1b,
+  0x1d00, 0x1dc3,
   0x1e00, 0x1e9b,
   0x1ea0, 0x1ef9,
   0x1f00, 0x1f15,
@@ -3136,7 +2820,8 @@ static const OnigCodePoint CRWord[] = {
   0x2070, 0x2071,
   0x2074, 0x2079,
   0x207f, 0x2089,
-  0x20d0, 0x20ea,
+  0x2090, 0x2094,
+  0x20d0, 0x20eb,
   0x2102, 0x2102,
   0x2107, 0x2107,
   0x210a, 0x2113,
@@ -3148,12 +2833,28 @@ static const OnigCodePoint CRWord[] = {
   0x212a, 0x212d,
   0x212f, 0x2131,
   0x2133, 0x2139,
-  0x213d, 0x213f,
+  0x213c, 0x213f,
   0x2145, 0x2149,
   0x2153, 0x2183,
   0x2460, 0x249b,
   0x24ea, 0x24ff,
   0x2776, 0x2793,
+  0x2c00, 0x2c2e,
+  0x2c30, 0x2c5e,
+  0x2c80, 0x2ce4,
+  0x2cfd, 0x2cfd,
+  0x2d00, 0x2d25,
+  0x2d30, 0x2d65,
+  0x2d6f, 0x2d6f,
+  0x2d80, 0x2d96,
+  0x2da0, 0x2da6,
+  0x2da8, 0x2dae,
+  0x2db0, 0x2db6,
+  0x2db8, 0x2dbe,
+  0x2dc0, 0x2dc6,
+  0x2dc8, 0x2dce,
+  0x2dd0, 0x2dd6,
+  0x2dd8, 0x2dde,
   0x3005, 0x3007,
   0x3021, 0x302f,
   0x3031, 0x3035,
@@ -3161,7 +2862,8 @@ static const OnigCodePoint CRWord[] = {
   0x3041, 0x3096,
   0x3099, 0x309a,
   0x309d, 0x309f,
-  0x30a1, 0x30ff,
+  0x30a1, 0x30fa,
+  0x30fc, 0x30ff,
   0x3105, 0x312c,
   0x3131, 0x318e,
   0x3192, 0x3195,
@@ -3172,11 +2874,13 @@ static const OnigCodePoint CRWord[] = {
   0x3280, 0x3289,
   0x32b1, 0x32bf,
   0x3400, 0x4db5,
-  0x4e00, 0x9fa5,
+  0x4e00, 0x9fbb,
   0xa000, 0xa48c,
+  0xa800, 0xa827,
   0xac00, 0xd7a3,
   0xf900, 0xfa2d,
   0xfa30, 0xfa6a,
+  0xfa70, 0xfad9,
   0xfb00, 0xfb06,
   0xfb13, 0xfb17,
   0xfb1d, 0xfb28,
@@ -3200,7 +2904,7 @@ static const OnigCodePoint CRWord[] = {
   0xff21, 0xff3a,
   0xff3f, 0xff3f,
   0xff41, 0xff5a,
-  0xff65, 0xffbe,
+  0xff66, 0xffbe,
   0xffc2, 0xffc7,
   0xffca, 0xffcf,
   0xffd2, 0xffd7,
@@ -3213,10 +2917,15 @@ static const OnigCodePoint CRWord[] = {
   0x10050, 0x1005d,
   0x10080, 0x100fa,
   0x10107, 0x10133,
+  0x10140, 0x10178,
+  0x1018a, 0x1018a,
   0x10300, 0x1031e,
   0x10320, 0x10323,
   0x10330, 0x1034a,
   0x10380, 0x1039d,
+  0x103a0, 0x103c3,
+  0x103c8, 0x103cf,
+  0x103d1, 0x103d5,
   0x10400, 0x1049d,
   0x104a0, 0x104a9,
   0x10800, 0x10805,
@@ -3225,11 +2934,19 @@ static const OnigCodePoint CRWord[] = {
   0x10837, 0x10838,
   0x1083c, 0x1083c,
   0x1083f, 0x1083f,
+  0x10a00, 0x10a03,
+  0x10a05, 0x10a06,
+  0x10a0c, 0x10a13,
+  0x10a15, 0x10a17,
+  0x10a19, 0x10a33,
+  0x10a38, 0x10a3a,
+  0x10a3f, 0x10a47,
   0x1d165, 0x1d169,
   0x1d16d, 0x1d172,
   0x1d17b, 0x1d182,
   0x1d185, 0x1d18b,
   0x1d1aa, 0x1d1ad,
+  0x1d242, 0x1d244,
   0x1d400, 0x1d454,
   0x1d456, 0x1d49c,
   0x1d49e, 0x1d49f,
@@ -3248,7 +2965,7 @@ static const OnigCodePoint CRWord[] = {
   0x1d540, 0x1d544,
   0x1d546, 0x1d546,
   0x1d54a, 0x1d550,
-  0x1d552, 0x1d6a3,
+  0x1d552, 0x1d6a5,
   0x1d6a8, 0x1d6c0,
   0x1d6c2, 0x1d6da,
   0x1d6dc, 0x1d6fa,
@@ -3264,140 +2981,8376 @@ static const OnigCodePoint CRWord[] = {
   0x20000, 0x2a6d6,
   0x2f800, 0x2fa1d,
   0xe0100, 0xe01ef
-#endif /* USE_UNICODE_FULL_RANGE_CTYPE */
-}; /* end of CRWord */
-
-
-extern int
-onigenc_unicode_is_code_ctype(OnigCodePoint code, unsigned int ctype)
-{
-  if (code < 256) {
-    return ONIGENC_IS_UNICODE_ISO_8859_1_CTYPE(code, ctype);
-  }
-
-#ifdef USE_UNICODE_FULL_RANGE_CTYPE
-
-  switch (ctype) {
-  case ONIGENC_CTYPE_ALPHA:
-    return onig_is_in_code_range((UChar* )CRAlpha, code);
-    break;
-  case ONIGENC_CTYPE_BLANK:
-    return onig_is_in_code_range((UChar* )CRBlank, code);
-    break;
-  case ONIGENC_CTYPE_CNTRL:
-    return onig_is_in_code_range((UChar* )CRCntrl, code);
-    break;
-  case ONIGENC_CTYPE_DIGIT:
-    return onig_is_in_code_range((UChar* )CRDigit, code);
-    break;
-  case ONIGENC_CTYPE_GRAPH:
-    return onig_is_in_code_range((UChar* )CRGraph, code);
-    break;
-  case ONIGENC_CTYPE_LOWER:
-    return onig_is_in_code_range((UChar* )CRLower, code);
-    break;
-  case ONIGENC_CTYPE_PRINT:
-    return onig_is_in_code_range((UChar* )CRPrint, code);
-    break;
-  case ONIGENC_CTYPE_PUNCT:
-    return onig_is_in_code_range((UChar* )CRPunct, code);
-    break;
-  case ONIGENC_CTYPE_SPACE:
-    return onig_is_in_code_range((UChar* )CRSpace, code);
-    break;
-  case ONIGENC_CTYPE_UPPER:
-    return onig_is_in_code_range((UChar* )CRUpper, code);
-    break;
-  case ONIGENC_CTYPE_XDIGIT:
-    return FALSE;
-    break;
-  case ONIGENC_CTYPE_WORD:
-    return onig_is_in_code_range((UChar* )CRWord, code);
-    break;
-  case ONIGENC_CTYPE_ASCII:
-    return FALSE;
-    break;
-  case ONIGENC_CTYPE_ALNUM:
-    return onig_is_in_code_range((UChar* )CRAlnum, code);
-    break;
-  case ONIGENC_CTYPE_NEWLINE:
-    return FALSE;
-    break;
-
-  default:
-    return ONIGENCERR_TYPE_BUG;
-    break;
-  }
-
-#else
-
-  if ((ctype & ONIGENC_CTYPE_WORD) != 0) {
-    return TRUE;
-  }
-  return FALSE;
-#endif /* USE_UNICODE_FULL_RANGE_CTYPE */
-}
+}; /* CR_Word */
 
-extern int
-onigenc_unicode_get_ctype_code_range(int ctype,
-                 const OnigCodePoint* sbr[], const OnigCodePoint* mbr[])
-{
-  static const OnigCodePoint EmptyRange[] = { 0 };
-
-#define CR_SET(list) do { \
-  *mbr = list; \
-} while (0)
-
-  *sbr = EmptyRange;
-
-  switch (ctype) {
-  case ONIGENC_CTYPE_ALPHA:
-    CR_SET(CRAlpha);
-    break;
-  case ONIGENC_CTYPE_BLANK:
-    CR_SET(CRBlank);
-    break;
-  case ONIGENC_CTYPE_CNTRL:
-    CR_SET(CRCntrl);
-    break;
-  case ONIGENC_CTYPE_DIGIT:
-    CR_SET(CRDigit);
-    break;
-  case ONIGENC_CTYPE_GRAPH:
-    CR_SET(CRGraph);
-    break;
-  case ONIGENC_CTYPE_LOWER:
-    CR_SET(CRLower);
-    break;
-  case ONIGENC_CTYPE_PRINT:
-    CR_SET(CRPrint);
-    break;
-  case ONIGENC_CTYPE_PUNCT:
-    CR_SET(CRPunct);
-    break;
-  case ONIGENC_CTYPE_SPACE:
-    CR_SET(CRSpace);
-    break;
-  case ONIGENC_CTYPE_UPPER:
-    CR_SET(CRUpper);
-    break;
-  case ONIGENC_CTYPE_XDIGIT:
-    CR_SET(CRXDigit);
-    break;
-  case ONIGENC_CTYPE_WORD:
-    CR_SET(CRWord);
-    break;
-  case ONIGENC_CTYPE_ASCII:
-    CR_SET(CRASCII);
-    break;
-  case ONIGENC_CTYPE_ALNUM:
-    CR_SET(CRAlnum);
-    break;
-
-  default:
-    return ONIGENCERR_TYPE_BUG;
-    break;
-  }
-
-  return 0;
+/* 'Alnum': [[:Alnum:]] */
+static const OnigCodePoint CR_Alnum[] = {
+  436,
+  0x0030, 0x0039,
+  0x0041, 0x005a,
+  0x0061, 0x007a,
+  0x00aa, 0x00aa,
+  0x00b5, 0x00b5,
+  0x00ba, 0x00ba,
+  0x00c0, 0x00d6,
+  0x00d8, 0x00f6,
+  0x00f8, 0x0241,
+  0x0250, 0x02c1,
+  0x02c6, 0x02d1,
+  0x02e0, 0x02e4,
+  0x02ee, 0x02ee,
+  0x0300, 0x036f,
+  0x037a, 0x037a,
+  0x0386, 0x0386,
+  0x0388, 0x038a,
+  0x038c, 0x038c,
+  0x038e, 0x03a1,
+  0x03a3, 0x03ce,
+  0x03d0, 0x03f5,
+  0x03f7, 0x0481,
+  0x0483, 0x0486,
+  0x0488, 0x04ce,
+  0x04d0, 0x04f9,
+  0x0500, 0x050f,
+  0x0531, 0x0556,
+  0x0559, 0x0559,
+  0x0561, 0x0587,
+  0x0591, 0x05b9,
+  0x05bb, 0x05bd,
+  0x05bf, 0x05bf,
+  0x05c1, 0x05c2,
+  0x05c4, 0x05c5,
+  0x05c7, 0x05c7,
+  0x05d0, 0x05ea,
+  0x05f0, 0x05f2,
+  0x0610, 0x0615,
+  0x0621, 0x063a,
+  0x0640, 0x065e,
+  0x0660, 0x0669,
+  0x066e, 0x06d3,
+  0x06d5, 0x06dc,
+  0x06de, 0x06e8,
+  0x06ea, 0x06fc,
+  0x06ff, 0x06ff,
+  0x0710, 0x074a,
+  0x074d, 0x076d,
+  0x0780, 0x07b1,
+  0x0901, 0x0939,
+  0x093c, 0x094d,
+  0x0950, 0x0954,
+  0x0958, 0x0963,
+  0x0966, 0x096f,
+  0x097d, 0x097d,
+  0x0981, 0x0983,
+  0x0985, 0x098c,
+  0x098f, 0x0990,
+  0x0993, 0x09a8,
+  0x09aa, 0x09b0,
+  0x09b2, 0x09b2,
+  0x09b6, 0x09b9,
+  0x09bc, 0x09c4,
+  0x09c7, 0x09c8,
+  0x09cb, 0x09ce,
+  0x09d7, 0x09d7,
+  0x09dc, 0x09dd,
+  0x09df, 0x09e3,
+  0x09e6, 0x09f1,
+  0x0a01, 0x0a03,
+  0x0a05, 0x0a0a,
+  0x0a0f, 0x0a10,
+  0x0a13, 0x0a28,
+  0x0a2a, 0x0a30,
+  0x0a32, 0x0a33,
+  0x0a35, 0x0a36,
+  0x0a38, 0x0a39,
+  0x0a3c, 0x0a3c,
+  0x0a3e, 0x0a42,
+  0x0a47, 0x0a48,
+  0x0a4b, 0x0a4d,
+  0x0a59, 0x0a5c,
+  0x0a5e, 0x0a5e,
+  0x0a66, 0x0a74,
+  0x0a81, 0x0a83,
+  0x0a85, 0x0a8d,
+  0x0a8f, 0x0a91,
+  0x0a93, 0x0aa8,
+  0x0aaa, 0x0ab0,
+  0x0ab2, 0x0ab3,
+  0x0ab5, 0x0ab9,
+  0x0abc, 0x0ac5,
+  0x0ac7, 0x0ac9,
+  0x0acb, 0x0acd,
+  0x0ad0, 0x0ad0,
+  0x0ae0, 0x0ae3,
+  0x0ae6, 0x0aef,
+  0x0b01, 0x0b03,
+  0x0b05, 0x0b0c,
+  0x0b0f, 0x0b10,
+  0x0b13, 0x0b28,
+  0x0b2a, 0x0b30,
+  0x0b32, 0x0b33,
+  0x0b35, 0x0b39,
+  0x0b3c, 0x0b43,
+  0x0b47, 0x0b48,
+  0x0b4b, 0x0b4d,
+  0x0b56, 0x0b57,
+  0x0b5c, 0x0b5d,
+  0x0b5f, 0x0b61,
+  0x0b66, 0x0b6f,
+  0x0b71, 0x0b71,
+  0x0b82, 0x0b83,
+  0x0b85, 0x0b8a,
+  0x0b8e, 0x0b90,
+  0x0b92, 0x0b95,
+  0x0b99, 0x0b9a,
+  0x0b9c, 0x0b9c,
+  0x0b9e, 0x0b9f,
+  0x0ba3, 0x0ba4,
+  0x0ba8, 0x0baa,
+  0x0bae, 0x0bb9,
+  0x0bbe, 0x0bc2,
+  0x0bc6, 0x0bc8,
+  0x0bca, 0x0bcd,
+  0x0bd7, 0x0bd7,
+  0x0be6, 0x0bef,
+  0x0c01, 0x0c03,
+  0x0c05, 0x0c0c,
+  0x0c0e, 0x0c10,
+  0x0c12, 0x0c28,
+  0x0c2a, 0x0c33,
+  0x0c35, 0x0c39,
+  0x0c3e, 0x0c44,
+  0x0c46, 0x0c48,
+  0x0c4a, 0x0c4d,
+  0x0c55, 0x0c56,
+  0x0c60, 0x0c61,
+  0x0c66, 0x0c6f,
+  0x0c82, 0x0c83,
+  0x0c85, 0x0c8c,
+  0x0c8e, 0x0c90,
+  0x0c92, 0x0ca8,
+  0x0caa, 0x0cb3,
+  0x0cb5, 0x0cb9,
+  0x0cbc, 0x0cc4,
+  0x0cc6, 0x0cc8,
+  0x0cca, 0x0ccd,
+  0x0cd5, 0x0cd6,
+  0x0cde, 0x0cde,
+  0x0ce0, 0x0ce1,
+  0x0ce6, 0x0cef,
+  0x0d02, 0x0d03,
+  0x0d05, 0x0d0c,
+  0x0d0e, 0x0d10,
+  0x0d12, 0x0d28,
+  0x0d2a, 0x0d39,
+  0x0d3e, 0x0d43,
+  0x0d46, 0x0d48,
+  0x0d4a, 0x0d4d,
+  0x0d57, 0x0d57,
+  0x0d60, 0x0d61,
+  0x0d66, 0x0d6f,
+  0x0d82, 0x0d83,
+  0x0d85, 0x0d96,
+  0x0d9a, 0x0db1,
+  0x0db3, 0x0dbb,
+  0x0dbd, 0x0dbd,
+  0x0dc0, 0x0dc6,
+  0x0dca, 0x0dca,
+  0x0dcf, 0x0dd4,
+  0x0dd6, 0x0dd6,
+  0x0dd8, 0x0ddf,
+  0x0df2, 0x0df3,
+  0x0e01, 0x0e3a,
+  0x0e40, 0x0e4e,
+  0x0e50, 0x0e59,
+  0x0e81, 0x0e82,
+  0x0e84, 0x0e84,
+  0x0e87, 0x0e88,
+  0x0e8a, 0x0e8a,
+  0x0e8d, 0x0e8d,
+  0x0e94, 0x0e97,
+  0x0e99, 0x0e9f,
+  0x0ea1, 0x0ea3,
+  0x0ea5, 0x0ea5,
+  0x0ea7, 0x0ea7,
+  0x0eaa, 0x0eab,
+  0x0ead, 0x0eb9,
+  0x0ebb, 0x0ebd,
+  0x0ec0, 0x0ec4,
+  0x0ec6, 0x0ec6,
+  0x0ec8, 0x0ecd,
+  0x0ed0, 0x0ed9,
+  0x0edc, 0x0edd,
+  0x0f00, 0x0f00,
+  0x0f18, 0x0f19,
+  0x0f20, 0x0f29,
+  0x0f35, 0x0f35,
+  0x0f37, 0x0f37,
+  0x0f39, 0x0f39,
+  0x0f3e, 0x0f47,
+  0x0f49, 0x0f6a,
+  0x0f71, 0x0f84,
+  0x0f86, 0x0f8b,
+  0x0f90, 0x0f97,
+  0x0f99, 0x0fbc,
+  0x0fc6, 0x0fc6,
+  0x1000, 0x1021,
+  0x1023, 0x1027,
+  0x1029, 0x102a,
+  0x102c, 0x1032,
+  0x1036, 0x1039,
+  0x1040, 0x1049,
+  0x1050, 0x1059,
+  0x10a0, 0x10c5,
+  0x10d0, 0x10fa,
+  0x10fc, 0x10fc,
+  0x1100, 0x1159,
+  0x115f, 0x11a2,
+  0x11a8, 0x11f9,
+  0x1200, 0x1248,
+  0x124a, 0x124d,
+  0x1250, 0x1256,
+  0x1258, 0x1258,
+  0x125a, 0x125d,
+  0x1260, 0x1288,
+  0x128a, 0x128d,
+  0x1290, 0x12b0,
+  0x12b2, 0x12b5,
+  0x12b8, 0x12be,
+  0x12c0, 0x12c0,
+  0x12c2, 0x12c5,
+  0x12c8, 0x12d6,
+  0x12d8, 0x1310,
+  0x1312, 0x1315,
+  0x1318, 0x135a,
+  0x135f, 0x135f,
+  0x1380, 0x138f,
+  0x13a0, 0x13f4,
+  0x1401, 0x166c,
+  0x166f, 0x1676,
+  0x1681, 0x169a,
+  0x16a0, 0x16ea,
+  0x1700, 0x170c,
+  0x170e, 0x1714,
+  0x1720, 0x1734,
+  0x1740, 0x1753,
+  0x1760, 0x176c,
+  0x176e, 0x1770,
+  0x1772, 0x1773,
+  0x1780, 0x17b3,
+  0x17b6, 0x17d3,
+  0x17d7, 0x17d7,
+  0x17dc, 0x17dd,
+  0x17e0, 0x17e9,
+  0x180b, 0x180d,
+  0x1810, 0x1819,
+  0x1820, 0x1877,
+  0x1880, 0x18a9,
+  0x1900, 0x191c,
+  0x1920, 0x192b,
+  0x1930, 0x193b,
+  0x1946, 0x196d,
+  0x1970, 0x1974,
+  0x1980, 0x19a9,
+  0x19b0, 0x19c9,
+  0x19d0, 0x19d9,
+  0x1a00, 0x1a1b,
+  0x1d00, 0x1dc3,
+  0x1e00, 0x1e9b,
+  0x1ea0, 0x1ef9,
+  0x1f00, 0x1f15,
+  0x1f18, 0x1f1d,
+  0x1f20, 0x1f45,
+  0x1f48, 0x1f4d,
+  0x1f50, 0x1f57,
+  0x1f59, 0x1f59,
+  0x1f5b, 0x1f5b,
+  0x1f5d, 0x1f5d,
+  0x1f5f, 0x1f7d,
+  0x1f80, 0x1fb4,
+  0x1fb6, 0x1fbc,
+  0x1fbe, 0x1fbe,
+  0x1fc2, 0x1fc4,
+  0x1fc6, 0x1fcc,
+  0x1fd0, 0x1fd3,
+  0x1fd6, 0x1fdb,
+  0x1fe0, 0x1fec,
+  0x1ff2, 0x1ff4,
+  0x1ff6, 0x1ffc,
+  0x2071, 0x2071,
+  0x207f, 0x207f,
+  0x2090, 0x2094,
+  0x20d0, 0x20eb,
+  0x2102, 0x2102,
+  0x2107, 0x2107,
+  0x210a, 0x2113,
+  0x2115, 0x2115,
+  0x2119, 0x211d,
+  0x2124, 0x2124,
+  0x2126, 0x2126,
+  0x2128, 0x2128,
+  0x212a, 0x212d,
+  0x212f, 0x2131,
+  0x2133, 0x2139,
+  0x213c, 0x213f,
+  0x2145, 0x2149,
+  0x2c00, 0x2c2e,
+  0x2c30, 0x2c5e,
+  0x2c80, 0x2ce4,
+  0x2d00, 0x2d25,
+  0x2d30, 0x2d65,
+  0x2d6f, 0x2d6f,
+  0x2d80, 0x2d96,
+  0x2da0, 0x2da6,
+  0x2da8, 0x2dae,
+  0x2db0, 0x2db6,
+  0x2db8, 0x2dbe,
+  0x2dc0, 0x2dc6,
+  0x2dc8, 0x2dce,
+  0x2dd0, 0x2dd6,
+  0x2dd8, 0x2dde,
+  0x3005, 0x3006,
+  0x302a, 0x302f,
+  0x3031, 0x3035,
+  0x303b, 0x303c,
+  0x3041, 0x3096,
+  0x3099, 0x309a,
+  0x309d, 0x309f,
+  0x30a1, 0x30fa,
+  0x30fc, 0x30ff,
+  0x3105, 0x312c,
+  0x3131, 0x318e,
+  0x31a0, 0x31b7,
+  0x31f0, 0x31ff,
+  0x3400, 0x4db5,
+  0x4e00, 0x9fbb,
+  0xa000, 0xa48c,
+  0xa800, 0xa827,
+  0xac00, 0xd7a3,
+  0xf900, 0xfa2d,
+  0xfa30, 0xfa6a,
+  0xfa70, 0xfad9,
+  0xfb00, 0xfb06,
+  0xfb13, 0xfb17,
+  0xfb1d, 0xfb28,
+  0xfb2a, 0xfb36,
+  0xfb38, 0xfb3c,
+  0xfb3e, 0xfb3e,
+  0xfb40, 0xfb41,
+  0xfb43, 0xfb44,
+  0xfb46, 0xfbb1,
+  0xfbd3, 0xfd3d,
+  0xfd50, 0xfd8f,
+  0xfd92, 0xfdc7,
+  0xfdf0, 0xfdfb,
+  0xfe00, 0xfe0f,
+  0xfe20, 0xfe23,
+  0xfe70, 0xfe74,
+  0xfe76, 0xfefc,
+  0xff10, 0xff19,
+  0xff21, 0xff3a,
+  0xff41, 0xff5a,
+  0xff66, 0xffbe,
+  0xffc2, 0xffc7,
+  0xffca, 0xffcf,
+  0xffd2, 0xffd7,
+  0xffda, 0xffdc,
+  0x10000, 0x1000b,
+  0x1000d, 0x10026,
+  0x10028, 0x1003a,
+  0x1003c, 0x1003d,
+  0x1003f, 0x1004d,
+  0x10050, 0x1005d,
+  0x10080, 0x100fa,
+  0x10300, 0x1031e,
+  0x10330, 0x10349,
+  0x10380, 0x1039d,
+  0x103a0, 0x103c3,
+  0x103c8, 0x103cf,
+  0x10400, 0x1049d,
+  0x104a0, 0x104a9,
+  0x10800, 0x10805,
+  0x10808, 0x10808,
+  0x1080a, 0x10835,
+  0x10837, 0x10838,
+  0x1083c, 0x1083c,
+  0x1083f, 0x1083f,
+  0x10a00, 0x10a03,
+  0x10a05, 0x10a06,
+  0x10a0c, 0x10a13,
+  0x10a15, 0x10a17,
+  0x10a19, 0x10a33,
+  0x10a38, 0x10a3a,
+  0x10a3f, 0x10a3f,
+  0x1d165, 0x1d169,
+  0x1d16d, 0x1d172,
+  0x1d17b, 0x1d182,
+  0x1d185, 0x1d18b,
+  0x1d1aa, 0x1d1ad,
+  0x1d242, 0x1d244,
+  0x1d400, 0x1d454,
+  0x1d456, 0x1d49c,
+  0x1d49e, 0x1d49f,
+  0x1d4a2, 0x1d4a2,
+  0x1d4a5, 0x1d4a6,
+  0x1d4a9, 0x1d4ac,
+  0x1d4ae, 0x1d4b9,
+  0x1d4bb, 0x1d4bb,
+  0x1d4bd, 0x1d4c3,
+  0x1d4c5, 0x1d505,
+  0x1d507, 0x1d50a,
+  0x1d50d, 0x1d514,
+  0x1d516, 0x1d51c,
+  0x1d51e, 0x1d539,
+  0x1d53b, 0x1d53e,
+  0x1d540, 0x1d544,
+  0x1d546, 0x1d546,
+  0x1d54a, 0x1d550,
+  0x1d552, 0x1d6a5,
+  0x1d6a8, 0x1d6c0,
+  0x1d6c2, 0x1d6da,
+  0x1d6dc, 0x1d6fa,
+  0x1d6fc, 0x1d714,
+  0x1d716, 0x1d734,
+  0x1d736, 0x1d74e,
+  0x1d750, 0x1d76e,
+  0x1d770, 0x1d788,
+  0x1d78a, 0x1d7a8,
+  0x1d7aa, 0x1d7c2,
+  0x1d7c4, 0x1d7c9,
+  0x1d7ce, 0x1d7ff,
+  0x20000, 0x2a6d6,
+  0x2f800, 0x2fa1d,
+  0xe0100, 0xe01ef
+}; /* CR_Alnum */
+
+/* 'ASCII': [[:ASCII:]] */
+static const OnigCodePoint CR_ASCII[] = {
+  1,
+  0x0000, 0x007f
+}; /* CR_ASCII */
+
+#ifdef USE_UNICODE_PROPERTIES
+
+/* 'Any': - */
+static const OnigCodePoint CR_Any[] = {
+  1,
+  0x0000, 0x10ffff
+}; /* CR_Any */
+
+/* 'Assigned': - */
+static const OnigCodePoint CR_Assigned[] = {
+  420,
+  0x0000, 0x0241,
+  0x0250, 0x036f,
+  0x0374, 0x0375,
+  0x037a, 0x037a,
+  0x037e, 0x037e,
+  0x0384, 0x038a,
+  0x038c, 0x038c,
+  0x038e, 0x03a1,
+  0x03a3, 0x03ce,
+  0x03d0, 0x0486,
+  0x0488, 0x04ce,
+  0x04d0, 0x04f9,
+  0x0500, 0x050f,
+  0x0531, 0x0556,
+  0x0559, 0x055f,
+  0x0561, 0x0587,
+  0x0589, 0x058a,
+  0x0591, 0x05b9,
+  0x05bb, 0x05c7,
+  0x05d0, 0x05ea,
+  0x05f0, 0x05f4,
+  0x0600, 0x0603,
+  0x060b, 0x0615,
+  0x061b, 0x061b,
+  0x061e, 0x061f,
+  0x0621, 0x063a,
+  0x0640, 0x065e,
+  0x0660, 0x070d,
+  0x070f, 0x074a,
+  0x074d, 0x076d,
+  0x0780, 0x07b1,
+  0x0901, 0x0939,
+  0x093c, 0x094d,
+  0x0950, 0x0954,
+  0x0958, 0x0970,
+  0x097d, 0x097d,
+  0x0981, 0x0983,
+  0x0985, 0x098c,
+  0x098f, 0x0990,
+  0x0993, 0x09a8,
+  0x09aa, 0x09b0,
+  0x09b2, 0x09b2,
+  0x09b6, 0x09b9,
+  0x09bc, 0x09c4,
+  0x09c7, 0x09c8,
+  0x09cb, 0x09ce,
+  0x09d7, 0x09d7,
+  0x09dc, 0x09dd,
+  0x09df, 0x09e3,
+  0x09e6, 0x09fa,
+  0x0a01, 0x0a03,
+  0x0a05, 0x0a0a,
+  0x0a0f, 0x0a10,
+  0x0a13, 0x0a28,
+  0x0a2a, 0x0a30,
+  0x0a32, 0x0a33,
+  0x0a35, 0x0a36,
+  0x0a38, 0x0a39,
+  0x0a3c, 0x0a3c,
+  0x0a3e, 0x0a42,
+  0x0a47, 0x0a48,
+  0x0a4b, 0x0a4d,
+  0x0a59, 0x0a5c,
+  0x0a5e, 0x0a5e,
+  0x0a66, 0x0a74,
+  0x0a81, 0x0a83,
+  0x0a85, 0x0a8d,
+  0x0a8f, 0x0a91,
+  0x0a93, 0x0aa8,
+  0x0aaa, 0x0ab0,
+  0x0ab2, 0x0ab3,
+  0x0ab5, 0x0ab9,
+  0x0abc, 0x0ac5,
+  0x0ac7, 0x0ac9,
+  0x0acb, 0x0acd,
+  0x0ad0, 0x0ad0,
+  0x0ae0, 0x0ae3,
+  0x0ae6, 0x0aef,
+  0x0af1, 0x0af1,
+  0x0b01, 0x0b03,
+  0x0b05, 0x0b0c,
+  0x0b0f, 0x0b10,
+  0x0b13, 0x0b28,
+  0x0b2a, 0x0b30,
+  0x0b32, 0x0b33,
+  0x0b35, 0x0b39,
+  0x0b3c, 0x0b43,
+  0x0b47, 0x0b48,
+  0x0b4b, 0x0b4d,
+  0x0b56, 0x0b57,
+  0x0b5c, 0x0b5d,
+  0x0b5f, 0x0b61,
+  0x0b66, 0x0b71,
+  0x0b82, 0x0b83,
+  0x0b85, 0x0b8a,
+  0x0b8e, 0x0b90,
+  0x0b92, 0x0b95,
+  0x0b99, 0x0b9a,
+  0x0b9c, 0x0b9c,
+  0x0b9e, 0x0b9f,
+  0x0ba3, 0x0ba4,
+  0x0ba8, 0x0baa,
+  0x0bae, 0x0bb9,
+  0x0bbe, 0x0bc2,
+  0x0bc6, 0x0bc8,
+  0x0bca, 0x0bcd,
+  0x0bd7, 0x0bd7,
+  0x0be6, 0x0bfa,
+  0x0c01, 0x0c03,
+  0x0c05, 0x0c0c,
+  0x0c0e, 0x0c10,
+  0x0c12, 0x0c28,
+  0x0c2a, 0x0c33,
+  0x0c35, 0x0c39,
+  0x0c3e, 0x0c44,
+  0x0c46, 0x0c48,
+  0x0c4a, 0x0c4d,
+  0x0c55, 0x0c56,
+  0x0c60, 0x0c61,
+  0x0c66, 0x0c6f,
+  0x0c82, 0x0c83,
+  0x0c85, 0x0c8c,
+  0x0c8e, 0x0c90,
+  0x0c92, 0x0ca8,
+  0x0caa, 0x0cb3,
+  0x0cb5, 0x0cb9,
+  0x0cbc, 0x0cc4,
+  0x0cc6, 0x0cc8,
+  0x0cca, 0x0ccd,
+  0x0cd5, 0x0cd6,
+  0x0cde, 0x0cde,
+  0x0ce0, 0x0ce1,
+  0x0ce6, 0x0cef,
+  0x0d02, 0x0d03,
+  0x0d05, 0x0d0c,
+  0x0d0e, 0x0d10,
+  0x0d12, 0x0d28,
+  0x0d2a, 0x0d39,
+  0x0d3e, 0x0d43,
+  0x0d46, 0x0d48,
+  0x0d4a, 0x0d4d,
+  0x0d57, 0x0d57,
+  0x0d60, 0x0d61,
+  0x0d66, 0x0d6f,
+  0x0d82, 0x0d83,
+  0x0d85, 0x0d96,
+  0x0d9a, 0x0db1,
+  0x0db3, 0x0dbb,
+  0x0dbd, 0x0dbd,
+  0x0dc0, 0x0dc6,
+  0x0dca, 0x0dca,
+  0x0dcf, 0x0dd4,
+  0x0dd6, 0x0dd6,
+  0x0dd8, 0x0ddf,
+  0x0df2, 0x0df4,
+  0x0e01, 0x0e3a,
+  0x0e3f, 0x0e5b,
+  0x0e81, 0x0e82,
+  0x0e84, 0x0e84,
+  0x0e87, 0x0e88,
+  0x0e8a, 0x0e8a,
+  0x0e8d, 0x0e8d,
+  0x0e94, 0x0e97,
+  0x0e99, 0x0e9f,
+  0x0ea1, 0x0ea3,
+  0x0ea5, 0x0ea5,
+  0x0ea7, 0x0ea7,
+  0x0eaa, 0x0eab,
+  0x0ead, 0x0eb9,
+  0x0ebb, 0x0ebd,
+  0x0ec0, 0x0ec4,
+  0x0ec6, 0x0ec6,
+  0x0ec8, 0x0ecd,
+  0x0ed0, 0x0ed9,
+  0x0edc, 0x0edd,
+  0x0f00, 0x0f47,
+  0x0f49, 0x0f6a,
+  0x0f71, 0x0f8b,
+  0x0f90, 0x0f97,
+  0x0f99, 0x0fbc,
+  0x0fbe, 0x0fcc,
+  0x0fcf, 0x0fd1,
+  0x1000, 0x1021,
+  0x1023, 0x1027,
+  0x1029, 0x102a,
+  0x102c, 0x1032,
+  0x1036, 0x1039,
+  0x1040, 0x1059,
+  0x10a0, 0x10c5,
+  0x10d0, 0x10fc,
+  0x1100, 0x1159,
+  0x115f, 0x11a2,
+  0x11a8, 0x11f9,
+  0x1200, 0x1248,
+  0x124a, 0x124d,
+  0x1250, 0x1256,
+  0x1258, 0x1258,
+  0x125a, 0x125d,
+  0x1260, 0x1288,
+  0x128a, 0x128d,
+  0x1290, 0x12b0,
+  0x12b2, 0x12b5,
+  0x12b8, 0x12be,
+  0x12c0, 0x12c0,
+  0x12c2, 0x12c5,
+  0x12c8, 0x12d6,
+  0x12d8, 0x1310,
+  0x1312, 0x1315,
+  0x1318, 0x135a,
+  0x135f, 0x137c,
+  0x1380, 0x1399,
+  0x13a0, 0x13f4,
+  0x1401, 0x1676,
+  0x1680, 0x169c,
+  0x16a0, 0x16f0,
+  0x1700, 0x170c,
+  0x170e, 0x1714,
+  0x1720, 0x1736,
+  0x1740, 0x1753,
+  0x1760, 0x176c,
+  0x176e, 0x1770,
+  0x1772, 0x1773,
+  0x1780, 0x17dd,
+  0x17e0, 0x17e9,
+  0x17f0, 0x17f9,
+  0x1800, 0x180e,
+  0x1810, 0x1819,
+  0x1820, 0x1877,
+  0x1880, 0x18a9,
+  0x1900, 0x191c,
+  0x1920, 0x192b,
+  0x1930, 0x193b,
+  0x1940, 0x1940,
+  0x1944, 0x196d,
+  0x1970, 0x1974,
+  0x1980, 0x19a9,
+  0x19b0, 0x19c9,
+  0x19d0, 0x19d9,
+  0x19de, 0x1a1b,
+  0x1a1e, 0x1a1f,
+  0x1d00, 0x1dc3,
+  0x1e00, 0x1e9b,
+  0x1ea0, 0x1ef9,
+  0x1f00, 0x1f15,
+  0x1f18, 0x1f1d,
+  0x1f20, 0x1f45,
+  0x1f48, 0x1f4d,
+  0x1f50, 0x1f57,
+  0x1f59, 0x1f59,
+  0x1f5b, 0x1f5b,
+  0x1f5d, 0x1f5d,
+  0x1f5f, 0x1f7d,
+  0x1f80, 0x1fb4,
+  0x1fb6, 0x1fc4,
+  0x1fc6, 0x1fd3,
+  0x1fd6, 0x1fdb,
+  0x1fdd, 0x1fef,
+  0x1ff2, 0x1ff4,
+  0x1ff6, 0x1ffe,
+  0x2000, 0x2063,
+  0x206a, 0x2071,
+  0x2074, 0x208e,
+  0x2090, 0x2094,
+  0x20a0, 0x20b5,
+  0x20d0, 0x20eb,
+  0x2100, 0x214c,
+  0x2153, 0x2183,
+  0x2190, 0x23db,
+  0x2400, 0x2426,
+  0x2440, 0x244a,
+  0x2460, 0x269c,
+  0x26a0, 0x26b1,
+  0x2701, 0x2704,
+  0x2706, 0x2709,
+  0x270c, 0x2727,
+  0x2729, 0x274b,
+  0x274d, 0x274d,
+  0x274f, 0x2752,
+  0x2756, 0x2756,
+  0x2758, 0x275e,
+  0x2761, 0x2794,
+  0x2798, 0x27af,
+  0x27b1, 0x27be,
+  0x27c0, 0x27c6,
+  0x27d0, 0x27eb,
+  0x27f0, 0x2b13,
+  0x2c00, 0x2c2e,
+  0x2c30, 0x2c5e,
+  0x2c80, 0x2cea,
+  0x2cf9, 0x2d25,
+  0x2d30, 0x2d65,
+  0x2d6f, 0x2d6f,
+  0x2d80, 0x2d96,
+  0x2da0, 0x2da6,
+  0x2da8, 0x2dae,
+  0x2db0, 0x2db6,
+  0x2db8, 0x2dbe,
+  0x2dc0, 0x2dc6,
+  0x2dc8, 0x2dce,
+  0x2dd0, 0x2dd6,
+  0x2dd8, 0x2dde,
+  0x2e00, 0x2e17,
+  0x2e1c, 0x2e1d,
+  0x2e80, 0x2e99,
+  0x2e9b, 0x2ef3,
+  0x2f00, 0x2fd5,
+  0x2ff0, 0x2ffb,
+  0x3000, 0x303f,
+  0x3041, 0x3096,
+  0x3099, 0x30ff,
+  0x3105, 0x312c,
+  0x3131, 0x318e,
+  0x3190, 0x31b7,
+  0x31c0, 0x31cf,
+  0x31f0, 0x321e,
+  0x3220, 0x3243,
+  0x3250, 0x32fe,
+  0x3300, 0x4db5,
+  0x4dc0, 0x9fbb,
+  0xa000, 0xa48c,
+  0xa490, 0xa4c6,
+  0xa700, 0xa716,
+  0xa800, 0xa82b,
+  0xac00, 0xd7a3,
+  0xd800, 0xfa2d,
+  0xfa30, 0xfa6a,
+  0xfa70, 0xfad9,
+  0xfb00, 0xfb06,
+  0xfb13, 0xfb17,
+  0xfb1d, 0xfb36,
+  0xfb38, 0xfb3c,
+  0xfb3e, 0xfb3e,
+  0xfb40, 0xfb41,
+  0xfb43, 0xfb44,
+  0xfb46, 0xfbb1,
+  0xfbd3, 0xfd3f,
+  0xfd50, 0xfd8f,
+  0xfd92, 0xfdc7,
+  0xfdf0, 0xfdfd,
+  0xfe00, 0xfe19,
+  0xfe20, 0xfe23,
+  0xfe30, 0xfe52,
+  0xfe54, 0xfe66,
+  0xfe68, 0xfe6b,
+  0xfe70, 0xfe74,
+  0xfe76, 0xfefc,
+  0xfeff, 0xfeff,
+  0xff01, 0xffbe,
+  0xffc2, 0xffc7,
+  0xffca, 0xffcf,
+  0xffd2, 0xffd7,
+  0xffda, 0xffdc,
+  0xffe0, 0xffe6,
+  0xffe8, 0xffee,
+  0xfff9, 0xfffd,
+  0x10000, 0x1000b,
+  0x1000d, 0x10026,
+  0x10028, 0x1003a,
+  0x1003c, 0x1003d,
+  0x1003f, 0x1004d,
+  0x10050, 0x1005d,
+  0x10080, 0x100fa,
+  0x10100, 0x10102,
+  0x10107, 0x10133,
+  0x10137, 0x1018a,
+  0x10300, 0x1031e,
+  0x10320, 0x10323,
+  0x10330, 0x1034a,
+  0x10380, 0x1039d,
+  0x1039f, 0x103c3,
+  0x103c8, 0x103d5,
+  0x10400, 0x1049d,
+  0x104a0, 0x104a9,
+  0x10800, 0x10805,
+  0x10808, 0x10808,
+  0x1080a, 0x10835,
+  0x10837, 0x10838,
+  0x1083c, 0x1083c,
+  0x1083f, 0x1083f,
+  0x10a00, 0x10a03,
+  0x10a05, 0x10a06,
+  0x10a0c, 0x10a13,
+  0x10a15, 0x10a17,
+  0x10a19, 0x10a33,
+  0x10a38, 0x10a3a,
+  0x10a3f, 0x10a47,
+  0x10a50, 0x10a58,
+  0x1d000, 0x1d0f5,
+  0x1d100, 0x1d126,
+  0x1d12a, 0x1d1dd,
+  0x1d200, 0x1d245,
+  0x1d300, 0x1d356,
+  0x1d400, 0x1d454,
+  0x1d456, 0x1d49c,
+  0x1d49e, 0x1d49f,
+  0x1d4a2, 0x1d4a2,
+  0x1d4a5, 0x1d4a6,
+  0x1d4a9, 0x1d4ac,
+  0x1d4ae, 0x1d4b9,
+  0x1d4bb, 0x1d4bb,
+  0x1d4bd, 0x1d4c3,
+  0x1d4c5, 0x1d505,
+  0x1d507, 0x1d50a,
+  0x1d50d, 0x1d514,
+  0x1d516, 0x1d51c,
+  0x1d51e, 0x1d539,
+  0x1d53b, 0x1d53e,
+  0x1d540, 0x1d544,
+  0x1d546, 0x1d546,
+  0x1d54a, 0x1d550,
+  0x1d552, 0x1d6a5,
+  0x1d6a8, 0x1d7c9,
+  0x1d7ce, 0x1d7ff,
+  0x20000, 0x2a6d6,
+  0x2f800, 0x2fa1d,
+  0xe0001, 0xe0001,
+  0xe0020, 0xe007f,
+  0xe0100, 0xe01ef,
+  0xf0000, 0xffffd,
+  0x100000, 0x10fffd
+}; /* CR_Assigned */
+
+/* 'C': Major Category */
+static const OnigCodePoint CR_C[] = {
+  422,
+  0x0000, 0x001f,
+  0x007f, 0x009f,
+  0x00ad, 0x00ad,
+  0x0242, 0x024f,
+  0x0370, 0x0373,
+  0x0376, 0x0379,
+  0x037b, 0x037d,
+  0x037f, 0x0383,
+  0x038b, 0x038b,
+  0x038d, 0x038d,
+  0x03a2, 0x03a2,
+  0x03cf, 0x03cf,
+  0x0487, 0x0487,
+  0x04cf, 0x04cf,
+  0x04fa, 0x04ff,
+  0x0510, 0x0530,
+  0x0557, 0x0558,
+  0x0560, 0x0560,
+  0x0588, 0x0588,
+  0x058b, 0x0590,
+  0x05ba, 0x05ba,
+  0x05c8, 0x05cf,
+  0x05eb, 0x05ef,
+  0x05f5, 0x060a,
+  0x0616, 0x061a,
+  0x061c, 0x061d,
+  0x0620, 0x0620,
+  0x063b, 0x063f,
+  0x065f, 0x065f,
+  0x06dd, 0x06dd,
+  0x070e, 0x070f,
+  0x074b, 0x074c,
+  0x076e, 0x077f,
+  0x07b2, 0x0900,
+  0x093a, 0x093b,
+  0x094e, 0x094f,
+  0x0955, 0x0957,
+  0x0971, 0x097c,
+  0x097e, 0x0980,
+  0x0984, 0x0984,
+  0x098d, 0x098e,
+  0x0991, 0x0992,
+  0x09a9, 0x09a9,
+  0x09b1, 0x09b1,
+  0x09b3, 0x09b5,
+  0x09ba, 0x09bb,
+  0x09c5, 0x09c6,
+  0x09c9, 0x09ca,
+  0x09cf, 0x09d6,
+  0x09d8, 0x09db,
+  0x09de, 0x09de,
+  0x09e4, 0x09e5,
+  0x09fb, 0x0a00,
+  0x0a04, 0x0a04,
+  0x0a0b, 0x0a0e,
+  0x0a11, 0x0a12,
+  0x0a29, 0x0a29,
+  0x0a31, 0x0a31,
+  0x0a34, 0x0a34,
+  0x0a37, 0x0a37,
+  0x0a3a, 0x0a3b,
+  0x0a3d, 0x0a3d,
+  0x0a43, 0x0a46,
+  0x0a49, 0x0a4a,
+  0x0a4e, 0x0a58,
+  0x0a5d, 0x0a5d,
+  0x0a5f, 0x0a65,
+  0x0a75, 0x0a80,
+  0x0a84, 0x0a84,
+  0x0a8e, 0x0a8e,
+  0x0a92, 0x0a92,
+  0x0aa9, 0x0aa9,
+  0x0ab1, 0x0ab1,
+  0x0ab4, 0x0ab4,
+  0x0aba, 0x0abb,
+  0x0ac6, 0x0ac6,
+  0x0aca, 0x0aca,
+  0x0ace, 0x0acf,
+  0x0ad1, 0x0adf,
+  0x0ae4, 0x0ae5,
+  0x0af0, 0x0af0,
+  0x0af2, 0x0b00,
+  0x0b04, 0x0b04,
+  0x0b0d, 0x0b0e,
+  0x0b11, 0x0b12,
+  0x0b29, 0x0b29,
+  0x0b31, 0x0b31,
+  0x0b34, 0x0b34,
+  0x0b3a, 0x0b3b,
+  0x0b44, 0x0b46,
+  0x0b49, 0x0b4a,
+  0x0b4e, 0x0b55,
+  0x0b58, 0x0b5b,
+  0x0b5e, 0x0b5e,
+  0x0b62, 0x0b65,
+  0x0b72, 0x0b81,
+  0x0b84, 0x0b84,
+  0x0b8b, 0x0b8d,
+  0x0b91, 0x0b91,
+  0x0b96, 0x0b98,
+  0x0b9b, 0x0b9b,
+  0x0b9d, 0x0b9d,
+  0x0ba0, 0x0ba2,
+  0x0ba5, 0x0ba7,
+  0x0bab, 0x0bad,
+  0x0bba, 0x0bbd,
+  0x0bc3, 0x0bc5,
+  0x0bc9, 0x0bc9,
+  0x0bce, 0x0bd6,
+  0x0bd8, 0x0be5,
+  0x0bfb, 0x0c00,
+  0x0c04, 0x0c04,
+  0x0c0d, 0x0c0d,
+  0x0c11, 0x0c11,
+  0x0c29, 0x0c29,
+  0x0c34, 0x0c34,
+  0x0c3a, 0x0c3d,
+  0x0c45, 0x0c45,
+  0x0c49, 0x0c49,
+  0x0c4e, 0x0c54,
+  0x0c57, 0x0c5f,
+  0x0c62, 0x0c65,
+  0x0c70, 0x0c81,
+  0x0c84, 0x0c84,
+  0x0c8d, 0x0c8d,
+  0x0c91, 0x0c91,
+  0x0ca9, 0x0ca9,
+  0x0cb4, 0x0cb4,
+  0x0cba, 0x0cbb,
+  0x0cc5, 0x0cc5,
+  0x0cc9, 0x0cc9,
+  0x0cce, 0x0cd4,
+  0x0cd7, 0x0cdd,
+  0x0cdf, 0x0cdf,
+  0x0ce2, 0x0ce5,
+  0x0cf0, 0x0d01,
+  0x0d04, 0x0d04,
+  0x0d0d, 0x0d0d,
+  0x0d11, 0x0d11,
+  0x0d29, 0x0d29,
+  0x0d3a, 0x0d3d,
+  0x0d44, 0x0d45,
+  0x0d49, 0x0d49,
+  0x0d4e, 0x0d56,
+  0x0d58, 0x0d5f,
+  0x0d62, 0x0d65,
+  0x0d70, 0x0d81,
+  0x0d84, 0x0d84,
+  0x0d97, 0x0d99,
+  0x0db2, 0x0db2,
+  0x0dbc, 0x0dbc,
+  0x0dbe, 0x0dbf,
+  0x0dc7, 0x0dc9,
+  0x0dcb, 0x0dce,
+  0x0dd5, 0x0dd5,
+  0x0dd7, 0x0dd7,
+  0x0de0, 0x0df1,
+  0x0df5, 0x0e00,
+  0x0e3b, 0x0e3e,
+  0x0e5c, 0x0e80,
+  0x0e83, 0x0e83,
+  0x0e85, 0x0e86,
+  0x0e89, 0x0e89,
+  0x0e8b, 0x0e8c,
+  0x0e8e, 0x0e93,
+  0x0e98, 0x0e98,
+  0x0ea0, 0x0ea0,
+  0x0ea4, 0x0ea4,
+  0x0ea6, 0x0ea6,
+  0x0ea8, 0x0ea9,
+  0x0eac, 0x0eac,
+  0x0eba, 0x0eba,
+  0x0ebe, 0x0ebf,
+  0x0ec5, 0x0ec5,
+  0x0ec7, 0x0ec7,
+  0x0ece, 0x0ecf,
+  0x0eda, 0x0edb,
+  0x0ede, 0x0eff,
+  0x0f48, 0x0f48,
+  0x0f6b, 0x0f70,
+  0x0f8c, 0x0f8f,
+  0x0f98, 0x0f98,
+  0x0fbd, 0x0fbd,
+  0x0fcd, 0x0fce,
+  0x0fd2, 0x0fff,
+  0x1022, 0x1022,
+  0x1028, 0x1028,
+  0x102b, 0x102b,
+  0x1033, 0x1035,
+  0x103a, 0x103f,
+  0x105a, 0x109f,
+  0x10c6, 0x10cf,
+  0x10fd, 0x10ff,
+  0x115a, 0x115e,
+  0x11a3, 0x11a7,
+  0x11fa, 0x11ff,
+  0x1249, 0x1249,
+  0x124e, 0x124f,
+  0x1257, 0x1257,
+  0x1259, 0x1259,
+  0x125e, 0x125f,
+  0x1289, 0x1289,
+  0x128e, 0x128f,
+  0x12b1, 0x12b1,
+  0x12b6, 0x12b7,
+  0x12bf, 0x12bf,
+  0x12c1, 0x12c1,
+  0x12c6, 0x12c7,
+  0x12d7, 0x12d7,
+  0x1311, 0x1311,
+  0x1316, 0x1317,
+  0x135b, 0x135e,
+  0x137d, 0x137f,
+  0x139a, 0x139f,
+  0x13f5, 0x1400,
+  0x1677, 0x167f,
+  0x169d, 0x169f,
+  0x16f1, 0x16ff,
+  0x170d, 0x170d,
+  0x1715, 0x171f,
+  0x1737, 0x173f,
+  0x1754, 0x175f,
+  0x176d, 0x176d,
+  0x1771, 0x1771,
+  0x1774, 0x177f,
+  0x17b4, 0x17b5,
+  0x17de, 0x17df,
+  0x17ea, 0x17ef,
+  0x17fa, 0x17ff,
+  0x180f, 0x180f,
+  0x181a, 0x181f,
+  0x1878, 0x187f,
+  0x18aa, 0x18ff,
+  0x191d, 0x191f,
+  0x192c, 0x192f,
+  0x193c, 0x193f,
+  0x1941, 0x1943,
+  0x196e, 0x196f,
+  0x1975, 0x197f,
+  0x19aa, 0x19af,
+  0x19ca, 0x19cf,
+  0x19da, 0x19dd,
+  0x1a1c, 0x1a1d,
+  0x1a20, 0x1cff,
+  0x1dc4, 0x1dff,
+  0x1e9c, 0x1e9f,
+  0x1efa, 0x1eff,
+  0x1f16, 0x1f17,
+  0x1f1e, 0x1f1f,
+  0x1f46, 0x1f47,
+  0x1f4e, 0x1f4f,
+  0x1f58, 0x1f58,
+  0x1f5a, 0x1f5a,
+  0x1f5c, 0x1f5c,
+  0x1f5e, 0x1f5e,
+  0x1f7e, 0x1f7f,
+  0x1fb5, 0x1fb5,
+  0x1fc5, 0x1fc5,
+  0x1fd4, 0x1fd5,
+  0x1fdc, 0x1fdc,
+  0x1ff0, 0x1ff1,
+  0x1ff5, 0x1ff5,
+  0x1fff, 0x1fff,
+  0x200b, 0x200f,
+  0x202a, 0x202e,
+  0x2060, 0x206f,
+  0x2072, 0x2073,
+  0x208f, 0x208f,
+  0x2095, 0x209f,
+  0x20b6, 0x20cf,
+  0x20ec, 0x20ff,
+  0x214d, 0x2152,
+  0x2184, 0x218f,
+  0x23dc, 0x23ff,
+  0x2427, 0x243f,
+  0x244b, 0x245f,
+  0x269d, 0x269f,
+  0x26b2, 0x2700,
+  0x2705, 0x2705,
+  0x270a, 0x270b,
+  0x2728, 0x2728,
+  0x274c, 0x274c,
+  0x274e, 0x274e,
+  0x2753, 0x2755,
+  0x2757, 0x2757,
+  0x275f, 0x2760,
+  0x2795, 0x2797,
+  0x27b0, 0x27b0,
+  0x27bf, 0x27bf,
+  0x27c7, 0x27cf,
+  0x27ec, 0x27ef,
+  0x2b14, 0x2bff,
+  0x2c2f, 0x2c2f,
+  0x2c5f, 0x2c7f,
+  0x2ceb, 0x2cf8,
+  0x2d26, 0x2d2f,
+  0x2d66, 0x2d6e,
+  0x2d70, 0x2d7f,
+  0x2d97, 0x2d9f,
+  0x2da7, 0x2da7,
+  0x2daf, 0x2daf,
+  0x2db7, 0x2db7,
+  0x2dbf, 0x2dbf,
+  0x2dc7, 0x2dc7,
+  0x2dcf, 0x2dcf,
+  0x2dd7, 0x2dd7,
+  0x2ddf, 0x2dff,
+  0x2e18, 0x2e1b,
+  0x2e1e, 0x2e7f,
+  0x2e9a, 0x2e9a,
+  0x2ef4, 0x2eff,
+  0x2fd6, 0x2fef,
+  0x2ffc, 0x2fff,
+  0x3040, 0x3040,
+  0x3097, 0x3098,
+  0x3100, 0x3104,
+  0x312d, 0x3130,
+  0x318f, 0x318f,
+  0x31b8, 0x31bf,
+  0x31d0, 0x31ef,
+  0x321f, 0x321f,
+  0x3244, 0x324f,
+  0x32ff, 0x32ff,
+  0x4db6, 0x4dbf,
+  0x9fbc, 0x9fff,
+  0xa48d, 0xa48f,
+  0xa4c7, 0xa6ff,
+  0xa717, 0xa7ff,
+  0xa82c, 0xabff,
+  0xd7a4, 0xf8ff,
+  0xfa2e, 0xfa2f,
+  0xfa6b, 0xfa6f,
+  0xfada, 0xfaff,
+  0xfb07, 0xfb12,
+  0xfb18, 0xfb1c,
+  0xfb37, 0xfb37,
+  0xfb3d, 0xfb3d,
+  0xfb3f, 0xfb3f,
+  0xfb42, 0xfb42,
+  0xfb45, 0xfb45,
+  0xfbb2, 0xfbd2,
+  0xfd40, 0xfd4f,
+  0xfd90, 0xfd91,
+  0xfdc8, 0xfdef,
+  0xfdfe, 0xfdff,
+  0xfe1a, 0xfe1f,
+  0xfe24, 0xfe2f,
+  0xfe53, 0xfe53,
+  0xfe67, 0xfe67,
+  0xfe6c, 0xfe6f,
+  0xfe75, 0xfe75,
+  0xfefd, 0xff00,
+  0xffbf, 0xffc1,
+  0xffc8, 0xffc9,
+  0xffd0, 0xffd1,
+  0xffd8, 0xffd9,
+  0xffdd, 0xffdf,
+  0xffe7, 0xffe7,
+  0xffef, 0xfffb,
+  0xfffe, 0xffff,
+  0x1000c, 0x1000c,
+  0x10027, 0x10027,
+  0x1003b, 0x1003b,
+  0x1003e, 0x1003e,
+  0x1004e, 0x1004f,
+  0x1005e, 0x1007f,
+  0x100fb, 0x100ff,
+  0x10103, 0x10106,
+  0x10134, 0x10136,
+  0x1018b, 0x102ff,
+  0x1031f, 0x1031f,
+  0x10324, 0x1032f,
+  0x1034b, 0x1037f,
+  0x1039e, 0x1039e,
+  0x103c4, 0x103c7,
+  0x103d6, 0x103ff,
+  0x1049e, 0x1049f,
+  0x104aa, 0x107ff,
+  0x10806, 0x10807,
+  0x10809, 0x10809,
+  0x10836, 0x10836,
+  0x10839, 0x1083b,
+  0x1083d, 0x1083e,
+  0x10840, 0x109ff,
+  0x10a04, 0x10a04,
+  0x10a07, 0x10a0b,
+  0x10a14, 0x10a14,
+  0x10a18, 0x10a18,
+  0x10a34, 0x10a37,
+  0x10a3b, 0x10a3e,
+  0x10a48, 0x10a4f,
+  0x10a59, 0x1cfff,
+  0x1d0f6, 0x1d0ff,
+  0x1d127, 0x1d129,
+  0x1d173, 0x1d17a,
+  0x1d1de, 0x1d1ff,
+  0x1d246, 0x1d2ff,
+  0x1d357, 0x1d3ff,
+  0x1d455, 0x1d455,
+  0x1d49d, 0x1d49d,
+  0x1d4a0, 0x1d4a1,
+  0x1d4a3, 0x1d4a4,
+  0x1d4a7, 0x1d4a8,
+  0x1d4ad, 0x1d4ad,
+  0x1d4ba, 0x1d4ba,
+  0x1d4bc, 0x1d4bc,
+  0x1d4c4, 0x1d4c4,
+  0x1d506, 0x1d506,
+  0x1d50b, 0x1d50c,
+  0x1d515, 0x1d515,
+  0x1d51d, 0x1d51d,
+  0x1d53a, 0x1d53a,
+  0x1d53f, 0x1d53f,
+  0x1d545, 0x1d545,
+  0x1d547, 0x1d549,
+  0x1d551, 0x1d551,
+  0x1d6a6, 0x1d6a7,
+  0x1d7ca, 0x1d7cd,
+  0x1d800, 0x1ffff,
+  0x2a6d7, 0x2f7ff,
+  0x2fa1e, 0xe00ff,
+  0xe01f0, 0x10ffff
+}; /* CR_C */
+
+/* 'Cc': General Category */
+static const OnigCodePoint CR_Cc[] = {
+  2,
+  0x0000, 0x001f,
+  0x007f, 0x009f
+}; /* CR_Cc */
+
+/* 'Cf': General Category */
+static const OnigCodePoint CR_Cf[] = {
+  14,
+  0x00ad, 0x00ad,
+  0x0600, 0x0603,
+  0x06dd, 0x06dd,
+  0x070f, 0x070f,
+  0x17b4, 0x17b5,
+  0x200b, 0x200f,
+  0x202a, 0x202e,
+  0x2060, 0x2063,
+  0x206a, 0x206f,
+  0xfeff, 0xfeff,
+  0xfff9, 0xfffb,
+  0x1d173, 0x1d17a,
+  0xe0001, 0xe0001,
+  0xe0020, 0xe007f
+}; /* CR_Cf */
+
+/* 'Cn': General Category */
+static const OnigCodePoint CR_Cn[] = {
+  420,
+  0x0242, 0x024f,
+  0x0370, 0x0373,
+  0x0376, 0x0379,
+  0x037b, 0x037d,
+  0x037f, 0x0383,
+  0x038b, 0x038b,
+  0x038d, 0x038d,
+  0x03a2, 0x03a2,
+  0x03cf, 0x03cf,
+  0x0487, 0x0487,
+  0x04cf, 0x04cf,
+  0x04fa, 0x04ff,
+  0x0510, 0x0530,
+  0x0557, 0x0558,
+  0x0560, 0x0560,
+  0x0588, 0x0588,
+  0x058b, 0x0590,
+  0x05ba, 0x05ba,
+  0x05c8, 0x05cf,
+  0x05eb, 0x05ef,
+  0x05f5, 0x05ff,
+  0x0604, 0x060a,
+  0x0616, 0x061a,
+  0x061c, 0x061d,
+  0x0620, 0x0620,
+  0x063b, 0x063f,
+  0x065f, 0x065f,
+  0x070e, 0x070e,
+  0x074b, 0x074c,
+  0x076e, 0x077f,
+  0x07b2, 0x0900,
+  0x093a, 0x093b,
+  0x094e, 0x094f,
+  0x0955, 0x0957,
+  0x0971, 0x097c,
+  0x097e, 0x0980,
+  0x0984, 0x0984,
+  0x098d, 0x098e,
+  0x0991, 0x0992,
+  0x09a9, 0x09a9,
+  0x09b1, 0x09b1,
+  0x09b3, 0x09b5,
+  0x09ba, 0x09bb,
+  0x09c5, 0x09c6,
+  0x09c9, 0x09ca,
+  0x09cf, 0x09d6,
+  0x09d8, 0x09db,
+  0x09de, 0x09de,
+  0x09e4, 0x09e5,
+  0x09fb, 0x0a00,
+  0x0a04, 0x0a04,
+  0x0a0b, 0x0a0e,
+  0x0a11, 0x0a12,
+  0x0a29, 0x0a29,
+  0x0a31, 0x0a31,
+  0x0a34, 0x0a34,
+  0x0a37, 0x0a37,
+  0x0a3a, 0x0a3b,
+  0x0a3d, 0x0a3d,
+  0x0a43, 0x0a46,
+  0x0a49, 0x0a4a,
+  0x0a4e, 0x0a58,
+  0x0a5d, 0x0a5d,
+  0x0a5f, 0x0a65,
+  0x0a75, 0x0a80,
+  0x0a84, 0x0a84,
+  0x0a8e, 0x0a8e,
+  0x0a92, 0x0a92,
+  0x0aa9, 0x0aa9,
+  0x0ab1, 0x0ab1,
+  0x0ab4, 0x0ab4,
+  0x0aba, 0x0abb,
+  0x0ac6, 0x0ac6,
+  0x0aca, 0x0aca,
+  0x0ace, 0x0acf,
+  0x0ad1, 0x0adf,
+  0x0ae4, 0x0ae5,
+  0x0af0, 0x0af0,
+  0x0af2, 0x0b00,
+  0x0b04, 0x0b04,
+  0x0b0d, 0x0b0e,
+  0x0b11, 0x0b12,
+  0x0b29, 0x0b29,
+  0x0b31, 0x0b31,
+  0x0b34, 0x0b34,
+  0x0b3a, 0x0b3b,
+  0x0b44, 0x0b46,
+  0x0b49, 0x0b4a,
+  0x0b4e, 0x0b55,
+  0x0b58, 0x0b5b,
+  0x0b5e, 0x0b5e,
+  0x0b62, 0x0b65,
+  0x0b72, 0x0b81,
+  0x0b84, 0x0b84,
+  0x0b8b, 0x0b8d,
+  0x0b91, 0x0b91,
+  0x0b96, 0x0b98,
+  0x0b9b, 0x0b9b,
+  0x0b9d, 0x0b9d,
+  0x0ba0, 0x0ba2,
+  0x0ba5, 0x0ba7,
+  0x0bab, 0x0bad,
+  0x0bba, 0x0bbd,
+  0x0bc3, 0x0bc5,
+  0x0bc9, 0x0bc9,
+  0x0bce, 0x0bd6,
+  0x0bd8, 0x0be5,
+  0x0bfb, 0x0c00,
+  0x0c04, 0x0c04,
+  0x0c0d, 0x0c0d,
+  0x0c11, 0x0c11,
+  0x0c29, 0x0c29,
+  0x0c34, 0x0c34,
+  0x0c3a, 0x0c3d,
+  0x0c45, 0x0c45,
+  0x0c49, 0x0c49,
+  0x0c4e, 0x0c54,
+  0x0c57, 0x0c5f,
+  0x0c62, 0x0c65,
+  0x0c70, 0x0c81,
+  0x0c84, 0x0c84,
+  0x0c8d, 0x0c8d,
+  0x0c91, 0x0c91,
+  0x0ca9, 0x0ca9,
+  0x0cb4, 0x0cb4,
+  0x0cba, 0x0cbb,
+  0x0cc5, 0x0cc5,
+  0x0cc9, 0x0cc9,
+  0x0cce, 0x0cd4,
+  0x0cd7, 0x0cdd,
+  0x0cdf, 0x0cdf,
+  0x0ce2, 0x0ce5,
+  0x0cf0, 0x0d01,
+  0x0d04, 0x0d04,
+  0x0d0d, 0x0d0d,
+  0x0d11, 0x0d11,
+  0x0d29, 0x0d29,
+  0x0d3a, 0x0d3d,
+  0x0d44, 0x0d45,
+  0x0d49, 0x0d49,
+  0x0d4e, 0x0d56,
+  0x0d58, 0x0d5f,
+  0x0d62, 0x0d65,
+  0x0d70, 0x0d81,
+  0x0d84, 0x0d84,
+  0x0d97, 0x0d99,
+  0x0db2, 0x0db2,
+  0x0dbc, 0x0dbc,
+  0x0dbe, 0x0dbf,
+  0x0dc7, 0x0dc9,
+  0x0dcb, 0x0dce,
+  0x0dd5, 0x0dd5,
+  0x0dd7, 0x0dd7,
+  0x0de0, 0x0df1,
+  0x0df5, 0x0e00,
+  0x0e3b, 0x0e3e,
+  0x0e5c, 0x0e80,
+  0x0e83, 0x0e83,
+  0x0e85, 0x0e86,
+  0x0e89, 0x0e89,
+  0x0e8b, 0x0e8c,
+  0x0e8e, 0x0e93,
+  0x0e98, 0x0e98,
+  0x0ea0, 0x0ea0,
+  0x0ea4, 0x0ea4,
+  0x0ea6, 0x0ea6,
+  0x0ea8, 0x0ea9,
+  0x0eac, 0x0eac,
+  0x0eba, 0x0eba,
+  0x0ebe, 0x0ebf,
+  0x0ec5, 0x0ec5,
+  0x0ec7, 0x0ec7,
+  0x0ece, 0x0ecf,
+  0x0eda, 0x0edb,
+  0x0ede, 0x0eff,
+  0x0f48, 0x0f48,
+  0x0f6b, 0x0f70,
+  0x0f8c, 0x0f8f,
+  0x0f98, 0x0f98,
+  0x0fbd, 0x0fbd,
+  0x0fcd, 0x0fce,
+  0x0fd2, 0x0fff,
+  0x1022, 0x1022,
+  0x1028, 0x1028,
+  0x102b, 0x102b,
+  0x1033, 0x1035,
+  0x103a, 0x103f,
+  0x105a, 0x109f,
+  0x10c6, 0x10cf,
+  0x10fd, 0x10ff,
+  0x115a, 0x115e,
+  0x11a3, 0x11a7,
+  0x11fa, 0x11ff,
+  0x1249, 0x1249,
+  0x124e, 0x124f,
+  0x1257, 0x1257,
+  0x1259, 0x1259,
+  0x125e, 0x125f,
+  0x1289, 0x1289,
+  0x128e, 0x128f,
+  0x12b1, 0x12b1,
+  0x12b6, 0x12b7,
+  0x12bf, 0x12bf,
+  0x12c1, 0x12c1,
+  0x12c6, 0x12c7,
+  0x12d7, 0x12d7,
+  0x1311, 0x1311,
+  0x1316, 0x1317,
+  0x135b, 0x135e,
+  0x137d, 0x137f,
+  0x139a, 0x139f,
+  0x13f5, 0x1400,
+  0x1677, 0x167f,
+  0x169d, 0x169f,
+  0x16f1, 0x16ff,
+  0x170d, 0x170d,
+  0x1715, 0x171f,
+  0x1737, 0x173f,
+  0x1754, 0x175f,
+  0x176d, 0x176d,
+  0x1771, 0x1771,
+  0x1774, 0x177f,
+  0x17de, 0x17df,
+  0x17ea, 0x17ef,
+  0x17fa, 0x17ff,
+  0x180f, 0x180f,
+  0x181a, 0x181f,
+  0x1878, 0x187f,
+  0x18aa, 0x18ff,
+  0x191d, 0x191f,
+  0x192c, 0x192f,
+  0x193c, 0x193f,
+  0x1941, 0x1943,
+  0x196e, 0x196f,
+  0x1975, 0x197f,
+  0x19aa, 0x19af,
+  0x19ca, 0x19cf,
+  0x19da, 0x19dd,
+  0x1a1c, 0x1a1d,
+  0x1a20, 0x1cff,
+  0x1dc4, 0x1dff,
+  0x1e9c, 0x1e9f,
+  0x1efa, 0x1eff,
+  0x1f16, 0x1f17,
+  0x1f1e, 0x1f1f,
+  0x1f46, 0x1f47,
+  0x1f4e, 0x1f4f,
+  0x1f58, 0x1f58,
+  0x1f5a, 0x1f5a,
+  0x1f5c, 0x1f5c,
+  0x1f5e, 0x1f5e,
+  0x1f7e, 0x1f7f,
+  0x1fb5, 0x1fb5,
+  0x1fc5, 0x1fc5,
+  0x1fd4, 0x1fd5,
+  0x1fdc, 0x1fdc,
+  0x1ff0, 0x1ff1,
+  0x1ff5, 0x1ff5,
+  0x1fff, 0x1fff,
+  0x2064, 0x2069,
+  0x2072, 0x2073,
+  0x208f, 0x208f,
+  0x2095, 0x209f,
+  0x20b6, 0x20cf,
+  0x20ec, 0x20ff,
+  0x214d, 0x2152,
+  0x2184, 0x218f,
+  0x23dc, 0x23ff,
+  0x2427, 0x243f,
+  0x244b, 0x245f,
+  0x269d, 0x269f,
+  0x26b2, 0x2700,
+  0x2705, 0x2705,
+  0x270a, 0x270b,
+  0x2728, 0x2728,
+  0x274c, 0x274c,
+  0x274e, 0x274e,
+  0x2753, 0x2755,
+  0x2757, 0x2757,
+  0x275f, 0x2760,
+  0x2795, 0x2797,
+  0x27b0, 0x27b0,
+  0x27bf, 0x27bf,
+  0x27c7, 0x27cf,
+  0x27ec, 0x27ef,
+  0x2b14, 0x2bff,
+  0x2c2f, 0x2c2f,
+  0x2c5f, 0x2c7f,
+  0x2ceb, 0x2cf8,
+  0x2d26, 0x2d2f,
+  0x2d66, 0x2d6e,
+  0x2d70, 0x2d7f,
+  0x2d97, 0x2d9f,
+  0x2da7, 0x2da7,
+  0x2daf, 0x2daf,
+  0x2db7, 0x2db7,
+  0x2dbf, 0x2dbf,
+  0x2dc7, 0x2dc7,
+  0x2dcf, 0x2dcf,
+  0x2dd7, 0x2dd7,
+  0x2ddf, 0x2dff,
+  0x2e18, 0x2e1b,
+  0x2e1e, 0x2e7f,
+  0x2e9a, 0x2e9a,
+  0x2ef4, 0x2eff,
+  0x2fd6, 0x2fef,
+  0x2ffc, 0x2fff,
+  0x3040, 0x3040,
+  0x3097, 0x3098,
+  0x3100, 0x3104,
+  0x312d, 0x3130,
+  0x318f, 0x318f,
+  0x31b8, 0x31bf,
+  0x31d0, 0x31ef,
+  0x321f, 0x321f,
+  0x3244, 0x324f,
+  0x32ff, 0x32ff,
+  0x4db6, 0x4dbf,
+  0x9fbc, 0x9fff,
+  0xa48d, 0xa48f,
+  0xa4c7, 0xa6ff,
+  0xa717, 0xa7ff,
+  0xa82c, 0xabff,
+  0xd7a4, 0xd7ff,
+  0xfa2e, 0xfa2f,
+  0xfa6b, 0xfa6f,
+  0xfada, 0xfaff,
+  0xfb07, 0xfb12,
+  0xfb18, 0xfb1c,
+  0xfb37, 0xfb37,
+  0xfb3d, 0xfb3d,
+  0xfb3f, 0xfb3f,
+  0xfb42, 0xfb42,
+  0xfb45, 0xfb45,
+  0xfbb2, 0xfbd2,
+  0xfd40, 0xfd4f,
+  0xfd90, 0xfd91,
+  0xfdc8, 0xfdef,
+  0xfdfe, 0xfdff,
+  0xfe1a, 0xfe1f,
+  0xfe24, 0xfe2f,
+  0xfe53, 0xfe53,
+  0xfe67, 0xfe67,
+  0xfe6c, 0xfe6f,
+  0xfe75, 0xfe75,
+  0xfefd, 0xfefe,
+  0xff00, 0xff00,
+  0xffbf, 0xffc1,
+  0xffc8, 0xffc9,
+  0xffd0, 0xffd1,
+  0xffd8, 0xffd9,
+  0xffdd, 0xffdf,
+  0xffe7, 0xffe7,
+  0xffef, 0xfff8,
+  0xfffe, 0xffff,
+  0x1000c, 0x1000c,
+  0x10027, 0x10027,
+  0x1003b, 0x1003b,
+  0x1003e, 0x1003e,
+  0x1004e, 0x1004f,
+  0x1005e, 0x1007f,
+  0x100fb, 0x100ff,
+  0x10103, 0x10106,
+  0x10134, 0x10136,
+  0x1018b, 0x102ff,
+  0x1031f, 0x1031f,
+  0x10324, 0x1032f,
+  0x1034b, 0x1037f,
+  0x1039e, 0x1039e,
+  0x103c4, 0x103c7,
+  0x103d6, 0x103ff,
+  0x1049e, 0x1049f,
+  0x104aa, 0x107ff,
+  0x10806, 0x10807,
+  0x10809, 0x10809,
+  0x10836, 0x10836,
+  0x10839, 0x1083b,
+  0x1083d, 0x1083e,
+  0x10840, 0x109ff,
+  0x10a04, 0x10a04,
+  0x10a07, 0x10a0b,
+  0x10a14, 0x10a14,
+  0x10a18, 0x10a18,
+  0x10a34, 0x10a37,
+  0x10a3b, 0x10a3e,
+  0x10a48, 0x10a4f,
+  0x10a59, 0x1cfff,
+  0x1d0f6, 0x1d0ff,
+  0x1d127, 0x1d129,
+  0x1d1de, 0x1d1ff,
+  0x1d246, 0x1d2ff,
+  0x1d357, 0x1d3ff,
+  0x1d455, 0x1d455,
+  0x1d49d, 0x1d49d,
+  0x1d4a0, 0x1d4a1,
+  0x1d4a3, 0x1d4a4,
+  0x1d4a7, 0x1d4a8,
+  0x1d4ad, 0x1d4ad,
+  0x1d4ba, 0x1d4ba,
+  0x1d4bc, 0x1d4bc,
+  0x1d4c4, 0x1d4c4,
+  0x1d506, 0x1d506,
+  0x1d50b, 0x1d50c,
+  0x1d515, 0x1d515,
+  0x1d51d, 0x1d51d,
+  0x1d53a, 0x1d53a,
+  0x1d53f, 0x1d53f,
+  0x1d545, 0x1d545,
+  0x1d547, 0x1d549,
+  0x1d551, 0x1d551,
+  0x1d6a6, 0x1d6a7,
+  0x1d7ca, 0x1d7cd,
+  0x1d800, 0x1ffff,
+  0x2a6d7, 0x2f7ff,
+  0x2fa1e, 0xe0000,
+  0xe0002, 0xe001f,
+  0xe0080, 0xe00ff,
+  0xe01f0, 0xeffff,
+  0xffffe, 0xfffff,
+  0x10fffe, 0x10ffff
+}; /* CR_Cn */
+
+/* 'Co': General Category */
+static const OnigCodePoint CR_Co[] = {
+  3,
+  0xe000, 0xf8ff,
+  0xf0000, 0xffffd,
+  0x100000, 0x10fffd
+}; /* CR_Co */
+
+/* 'Cs': General Category */
+static const OnigCodePoint CR_Cs[] = {
+  1,
+  0xd800, 0xdfff
+}; /* CR_Cs */
+
+/* 'L': Major Category */
+static const OnigCodePoint CR_L[] = {
+  347,
+  0x0041, 0x005a,
+  0x0061, 0x007a,
+  0x00aa, 0x00aa,
+  0x00b5, 0x00b5,
+  0x00ba, 0x00ba,
+  0x00c0, 0x00d6,
+  0x00d8, 0x00f6,
+  0x00f8, 0x0241,
+  0x0250, 0x02c1,
+  0x02c6, 0x02d1,
+  0x02e0, 0x02e4,
+  0x02ee, 0x02ee,
+  0x037a, 0x037a,
+  0x0386, 0x0386,
+  0x0388, 0x038a,
+  0x038c, 0x038c,
+  0x038e, 0x03a1,
+  0x03a3, 0x03ce,
+  0x03d0, 0x03f5,
+  0x03f7, 0x0481,
+  0x048a, 0x04ce,
+  0x04d0, 0x04f9,
+  0x0500, 0x050f,
+  0x0531, 0x0556,
+  0x0559, 0x0559,
+  0x0561, 0x0587,
+  0x05d0, 0x05ea,
+  0x05f0, 0x05f2,
+  0x0621, 0x063a,
+  0x0640, 0x064a,
+  0x066e, 0x066f,
+  0x0671, 0x06d3,
+  0x06d5, 0x06d5,
+  0x06e5, 0x06e6,
+  0x06ee, 0x06ef,
+  0x06fa, 0x06fc,
+  0x06ff, 0x06ff,
+  0x0710, 0x0710,
+  0x0712, 0x072f,
+  0x074d, 0x076d,
+  0x0780, 0x07a5,
+  0x07b1, 0x07b1,
+  0x0904, 0x0939,
+  0x093d, 0x093d,
+  0x0950, 0x0950,
+  0x0958, 0x0961,
+  0x097d, 0x097d,
+  0x0985, 0x098c,
+  0x098f, 0x0990,
+  0x0993, 0x09a8,
+  0x09aa, 0x09b0,
+  0x09b2, 0x09b2,
+  0x09b6, 0x09b9,
+  0x09bd, 0x09bd,
+  0x09ce, 0x09ce,
+  0x09dc, 0x09dd,
+  0x09df, 0x09e1,
+  0x09f0, 0x09f1,
+  0x0a05, 0x0a0a,
+  0x0a0f, 0x0a10,
+  0x0a13, 0x0a28,
+  0x0a2a, 0x0a30,
+  0x0a32, 0x0a33,
+  0x0a35, 0x0a36,
+  0x0a38, 0x0a39,
+  0x0a59, 0x0a5c,
+  0x0a5e, 0x0a5e,
+  0x0a72, 0x0a74,
+  0x0a85, 0x0a8d,
+  0x0a8f, 0x0a91,
+  0x0a93, 0x0aa8,
+  0x0aaa, 0x0ab0,
+  0x0ab2, 0x0ab3,
+  0x0ab5, 0x0ab9,
+  0x0abd, 0x0abd,
+  0x0ad0, 0x0ad0,
+  0x0ae0, 0x0ae1,
+  0x0b05, 0x0b0c,
+  0x0b0f, 0x0b10,
+  0x0b13, 0x0b28,
+  0x0b2a, 0x0b30,
+  0x0b32, 0x0b33,
+  0x0b35, 0x0b39,
+  0x0b3d, 0x0b3d,
+  0x0b5c, 0x0b5d,
+  0x0b5f, 0x0b61,
+  0x0b71, 0x0b71,
+  0x0b83, 0x0b83,
+  0x0b85, 0x0b8a,
+  0x0b8e, 0x0b90,
+  0x0b92, 0x0b95,
+  0x0b99, 0x0b9a,
+  0x0b9c, 0x0b9c,
+  0x0b9e, 0x0b9f,
+  0x0ba3, 0x0ba4,
+  0x0ba8, 0x0baa,
+  0x0bae, 0x0bb9,
+  0x0c05, 0x0c0c,
+  0x0c0e, 0x0c10,
+  0x0c12, 0x0c28,
+  0x0c2a, 0x0c33,
+  0x0c35, 0x0c39,
+  0x0c60, 0x0c61,
+  0x0c85, 0x0c8c,
+  0x0c8e, 0x0c90,
+  0x0c92, 0x0ca8,
+  0x0caa, 0x0cb3,
+  0x0cb5, 0x0cb9,
+  0x0cbd, 0x0cbd,
+  0x0cde, 0x0cde,
+  0x0ce0, 0x0ce1,
+  0x0d05, 0x0d0c,
+  0x0d0e, 0x0d10,
+  0x0d12, 0x0d28,
+  0x0d2a, 0x0d39,
+  0x0d60, 0x0d61,
+  0x0d85, 0x0d96,
+  0x0d9a, 0x0db1,
+  0x0db3, 0x0dbb,
+  0x0dbd, 0x0dbd,
+  0x0dc0, 0x0dc6,
+  0x0e01, 0x0e30,
+  0x0e32, 0x0e33,
+  0x0e40, 0x0e46,
+  0x0e81, 0x0e82,
+  0x0e84, 0x0e84,
+  0x0e87, 0x0e88,
+  0x0e8a, 0x0e8a,
+  0x0e8d, 0x0e8d,
+  0x0e94, 0x0e97,
+  0x0e99, 0x0e9f,
+  0x0ea1, 0x0ea3,
+  0x0ea5, 0x0ea5,
+  0x0ea7, 0x0ea7,
+  0x0eaa, 0x0eab,
+  0x0ead, 0x0eb0,
+  0x0eb2, 0x0eb3,
+  0x0ebd, 0x0ebd,
+  0x0ec0, 0x0ec4,
+  0x0ec6, 0x0ec6,
+  0x0edc, 0x0edd,
+  0x0f00, 0x0f00,
+  0x0f40, 0x0f47,
+  0x0f49, 0x0f6a,
+  0x0f88, 0x0f8b,
+  0x1000, 0x1021,
+  0x1023, 0x1027,
+  0x1029, 0x102a,
+  0x1050, 0x1055,
+  0x10a0, 0x10c5,
+  0x10d0, 0x10fa,
+  0x10fc, 0x10fc,
+  0x1100, 0x1159,
+  0x115f, 0x11a2,
+  0x11a8, 0x11f9,
+  0x1200, 0x1248,
+  0x124a, 0x124d,
+  0x1250, 0x1256,
+  0x1258, 0x1258,
+  0x125a, 0x125d,
+  0x1260, 0x1288,
+  0x128a, 0x128d,
+  0x1290, 0x12b0,
+  0x12b2, 0x12b5,
+  0x12b8, 0x12be,
+  0x12c0, 0x12c0,
+  0x12c2, 0x12c5,
+  0x12c8, 0x12d6,
+  0x12d8, 0x1310,
+  0x1312, 0x1315,
+  0x1318, 0x135a,
+  0x1380, 0x138f,
+  0x13a0, 0x13f4,
+  0x1401, 0x166c,
+  0x166f, 0x1676,
+  0x1681, 0x169a,
+  0x16a0, 0x16ea,
+  0x1700, 0x170c,
+  0x170e, 0x1711,
+  0x1720, 0x1731,
+  0x1740, 0x1751,
+  0x1760, 0x176c,
+  0x176e, 0x1770,
+  0x1780, 0x17b3,
+  0x17d7, 0x17d7,
+  0x17dc, 0x17dc,
+  0x1820, 0x1877,
+  0x1880, 0x18a8,
+  0x1900, 0x191c,
+  0x1950, 0x196d,
+  0x1970, 0x1974,
+  0x1980, 0x19a9,
+  0x19c1, 0x19c7,
+  0x1a00, 0x1a16,
+  0x1d00, 0x1dbf,
+  0x1e00, 0x1e9b,
+  0x1ea0, 0x1ef9,
+  0x1f00, 0x1f15,
+  0x1f18, 0x1f1d,
+  0x1f20, 0x1f45,
+  0x1f48, 0x1f4d,
+  0x1f50, 0x1f57,
+  0x1f59, 0x1f59,
+  0x1f5b, 0x1f5b,
+  0x1f5d, 0x1f5d,
+  0x1f5f, 0x1f7d,
+  0x1f80, 0x1fb4,
+  0x1fb6, 0x1fbc,
+  0x1fbe, 0x1fbe,
+  0x1fc2, 0x1fc4,
+  0x1fc6, 0x1fcc,
+  0x1fd0, 0x1fd3,
+  0x1fd6, 0x1fdb,
+  0x1fe0, 0x1fec,
+  0x1ff2, 0x1ff4,
+  0x1ff6, 0x1ffc,
+  0x2071, 0x2071,
+  0x207f, 0x207f,
+  0x2090, 0x2094,
+  0x2102, 0x2102,
+  0x2107, 0x2107,
+  0x210a, 0x2113,
+  0x2115, 0x2115,
+  0x2119, 0x211d,
+  0x2124, 0x2124,
+  0x2126, 0x2126,
+  0x2128, 0x2128,
+  0x212a, 0x212d,
+  0x212f, 0x2131,
+  0x2133, 0x2139,
+  0x213c, 0x213f,
+  0x2145, 0x2149,
+  0x2c00, 0x2c2e,
+  0x2c30, 0x2c5e,
+  0x2c80, 0x2ce4,
+  0x2d00, 0x2d25,
+  0x2d30, 0x2d65,
+  0x2d6f, 0x2d6f,
+  0x2d80, 0x2d96,
+  0x2da0, 0x2da6,
+  0x2da8, 0x2dae,
+  0x2db0, 0x2db6,
+  0x2db8, 0x2dbe,
+  0x2dc0, 0x2dc6,
+  0x2dc8, 0x2dce,
+  0x2dd0, 0x2dd6,
+  0x2dd8, 0x2dde,
+  0x3005, 0x3006,
+  0x3031, 0x3035,
+  0x303b, 0x303c,
+  0x3041, 0x3096,
+  0x309d, 0x309f,
+  0x30a1, 0x30fa,
+  0x30fc, 0x30ff,
+  0x3105, 0x312c,
+  0x3131, 0x318e,
+  0x31a0, 0x31b7,
+  0x31f0, 0x31ff,
+  0x3400, 0x4db5,
+  0x4e00, 0x9fbb,
+  0xa000, 0xa48c,
+  0xa800, 0xa801,
+  0xa803, 0xa805,
+  0xa807, 0xa80a,
+  0xa80c, 0xa822,
+  0xac00, 0xd7a3,
+  0xf900, 0xfa2d,
+  0xfa30, 0xfa6a,
+  0xfa70, 0xfad9,
+  0xfb00, 0xfb06,
+  0xfb13, 0xfb17,
+  0xfb1d, 0xfb1d,
+  0xfb1f, 0xfb28,
+  0xfb2a, 0xfb36,
+  0xfb38, 0xfb3c,
+  0xfb3e, 0xfb3e,
+  0xfb40, 0xfb41,
+  0xfb43, 0xfb44,
+  0xfb46, 0xfbb1,
+  0xfbd3, 0xfd3d,
+  0xfd50, 0xfd8f,
+  0xfd92, 0xfdc7,
+  0xfdf0, 0xfdfb,
+  0xfe70, 0xfe74,
+  0xfe76, 0xfefc,
+  0xff21, 0xff3a,
+  0xff41, 0xff5a,
+  0xff66, 0xffbe,
+  0xffc2, 0xffc7,
+  0xffca, 0xffcf,
+  0xffd2, 0xffd7,
+  0xffda, 0xffdc,
+  0x10000, 0x1000b,
+  0x1000d, 0x10026,
+  0x10028, 0x1003a,
+  0x1003c, 0x1003d,
+  0x1003f, 0x1004d,
+  0x10050, 0x1005d,
+  0x10080, 0x100fa,
+  0x10300, 0x1031e,
+  0x10330, 0x10349,
+  0x10380, 0x1039d,
+  0x103a0, 0x103c3,
+  0x103c8, 0x103cf,
+  0x10400, 0x1049d,
+  0x10800, 0x10805,
+  0x10808, 0x10808,
+  0x1080a, 0x10835,
+  0x10837, 0x10838,
+  0x1083c, 0x1083c,
+  0x1083f, 0x1083f,
+  0x10a00, 0x10a00,
+  0x10a10, 0x10a13,
+  0x10a15, 0x10a17,
+  0x10a19, 0x10a33,
+  0x1d400, 0x1d454,
+  0x1d456, 0x1d49c,
+  0x1d49e, 0x1d49f,
+  0x1d4a2, 0x1d4a2,
+  0x1d4a5, 0x1d4a6,
+  0x1d4a9, 0x1d4ac,
+  0x1d4ae, 0x1d4b9,
+  0x1d4bb, 0x1d4bb,
+  0x1d4bd, 0x1d4c3,
+  0x1d4c5, 0x1d505,
+  0x1d507, 0x1d50a,
+  0x1d50d, 0x1d514,
+  0x1d516, 0x1d51c,
+  0x1d51e, 0x1d539,
+  0x1d53b, 0x1d53e,
+  0x1d540, 0x1d544,
+  0x1d546, 0x1d546,
+  0x1d54a, 0x1d550,
+  0x1d552, 0x1d6a5,
+  0x1d6a8, 0x1d6c0,
+  0x1d6c2, 0x1d6da,
+  0x1d6dc, 0x1d6fa,
+  0x1d6fc, 0x1d714,
+  0x1d716, 0x1d734,
+  0x1d736, 0x1d74e,
+  0x1d750, 0x1d76e,
+  0x1d770, 0x1d788,
+  0x1d78a, 0x1d7a8,
+  0x1d7aa, 0x1d7c2,
+  0x1d7c4, 0x1d7c9,
+  0x20000, 0x2a6d6,
+  0x2f800, 0x2fa1d
+}; /* CR_L */
+
+/* 'Ll': General Category */
+static const OnigCodePoint CR_Ll[] = {
+  480,
+  0x0061, 0x007a,
+  0x00aa, 0x00aa,
+  0x00b5, 0x00b5,
+  0x00ba, 0x00ba,
+  0x00df, 0x00f6,
+  0x00f8, 0x00ff,
+  0x0101, 0x0101,
+  0x0103, 0x0103,
+  0x0105, 0x0105,
+  0x0107, 0x0107,
+  0x0109, 0x0109,
+  0x010b, 0x010b,
+  0x010d, 0x010d,
+  0x010f, 0x010f,
+  0x0111, 0x0111,
+  0x0113, 0x0113,
+  0x0115, 0x0115,
+  0x0117, 0x0117,
+  0x0119, 0x0119,
+  0x011b, 0x011b,
+  0x011d, 0x011d,
+  0x011f, 0x011f,
+  0x0121, 0x0121,
+  0x0123, 0x0123,
+  0x0125, 0x0125,
+  0x0127, 0x0127,
+  0x0129, 0x0129,
+  0x012b, 0x012b,
+  0x012d, 0x012d,
+  0x012f, 0x012f,
+  0x0131, 0x0131,
+  0x0133, 0x0133,
+  0x0135, 0x0135,
+  0x0137, 0x0138,
+  0x013a, 0x013a,
+  0x013c, 0x013c,
+  0x013e, 0x013e,
+  0x0140, 0x0140,
+  0x0142, 0x0142,
+  0x0144, 0x0144,
+  0x0146, 0x0146,
+  0x0148, 0x0149,
+  0x014b, 0x014b,
+  0x014d, 0x014d,
+  0x014f, 0x014f,
+  0x0151, 0x0151,
+  0x0153, 0x0153,
+  0x0155, 0x0155,
+  0x0157, 0x0157,
+  0x0159, 0x0159,
+  0x015b, 0x015b,
+  0x015d, 0x015d,
+  0x015f, 0x015f,
+  0x0161, 0x0161,
+  0x0163, 0x0163,
+  0x0165, 0x0165,
+  0x0167, 0x0167,
+  0x0169, 0x0169,
+  0x016b, 0x016b,
+  0x016d, 0x016d,
+  0x016f, 0x016f,
+  0x0171, 0x0171,
+  0x0173, 0x0173,
+  0x0175, 0x0175,
+  0x0177, 0x0177,
+  0x017a, 0x017a,
+  0x017c, 0x017c,
+  0x017e, 0x0180,
+  0x0183, 0x0183,
+  0x0185, 0x0185,
+  0x0188, 0x0188,
+  0x018c, 0x018d,
+  0x0192, 0x0192,
+  0x0195, 0x0195,
+  0x0199, 0x019b,
+  0x019e, 0x019e,
+  0x01a1, 0x01a1,
+  0x01a3, 0x01a3,
+  0x01a5, 0x01a5,
+  0x01a8, 0x01a8,
+  0x01aa, 0x01ab,
+  0x01ad, 0x01ad,
+  0x01b0, 0x01b0,
+  0x01b4, 0x01b4,
+  0x01b6, 0x01b6,
+  0x01b9, 0x01ba,
+  0x01bd, 0x01bf,
+  0x01c6, 0x01c6,
+  0x01c9, 0x01c9,
+  0x01cc, 0x01cc,
+  0x01ce, 0x01ce,
+  0x01d0, 0x01d0,
+  0x01d2, 0x01d2,
+  0x01d4, 0x01d4,
+  0x01d6, 0x01d6,
+  0x01d8, 0x01d8,
+  0x01da, 0x01da,
+  0x01dc, 0x01dd,
+  0x01df, 0x01df,
+  0x01e1, 0x01e1,
+  0x01e3, 0x01e3,
+  0x01e5, 0x01e5,
+  0x01e7, 0x01e7,
+  0x01e9, 0x01e9,
+  0x01eb, 0x01eb,
+  0x01ed, 0x01ed,
+  0x01ef, 0x01f0,
+  0x01f3, 0x01f3,
+  0x01f5, 0x01f5,
+  0x01f9, 0x01f9,
+  0x01fb, 0x01fb,
+  0x01fd, 0x01fd,
+  0x01ff, 0x01ff,
+  0x0201, 0x0201,
+  0x0203, 0x0203,
+  0x0205, 0x0205,
+  0x0207, 0x0207,
+  0x0209, 0x0209,
+  0x020b, 0x020b,
+  0x020d, 0x020d,
+  0x020f, 0x020f,
+  0x0211, 0x0211,
+  0x0213, 0x0213,
+  0x0215, 0x0215,
+  0x0217, 0x0217,
+  0x0219, 0x0219,
+  0x021b, 0x021b,
+  0x021d, 0x021d,
+  0x021f, 0x021f,
+  0x0221, 0x0221,
+  0x0223, 0x0223,
+  0x0225, 0x0225,
+  0x0227, 0x0227,
+  0x0229, 0x0229,
+  0x022b, 0x022b,
+  0x022d, 0x022d,
+  0x022f, 0x022f,
+  0x0231, 0x0231,
+  0x0233, 0x0239,
+  0x023c, 0x023c,
+  0x023f, 0x0240,
+  0x0250, 0x02af,
+  0x0390, 0x0390,
+  0x03ac, 0x03ce,
+  0x03d0, 0x03d1,
+  0x03d5, 0x03d7,
+  0x03d9, 0x03d9,
+  0x03db, 0x03db,
+  0x03dd, 0x03dd,
+  0x03df, 0x03df,
+  0x03e1, 0x03e1,
+  0x03e3, 0x03e3,
+  0x03e5, 0x03e5,
+  0x03e7, 0x03e7,
+  0x03e9, 0x03e9,
+  0x03eb, 0x03eb,
+  0x03ed, 0x03ed,
+  0x03ef, 0x03f3,
+  0x03f5, 0x03f5,
+  0x03f8, 0x03f8,
+  0x03fb, 0x03fc,
+  0x0430, 0x045f,
+  0x0461, 0x0461,
+  0x0463, 0x0463,
+  0x0465, 0x0465,
+  0x0467, 0x0467,
+  0x0469, 0x0469,
+  0x046b, 0x046b,
+  0x046d, 0x046d,
+  0x046f, 0x046f,
+  0x0471, 0x0471,
+  0x0473, 0x0473,
+  0x0475, 0x0475,
+  0x0477, 0x0477,
+  0x0479, 0x0479,
+  0x047b, 0x047b,
+  0x047d, 0x047d,
+  0x047f, 0x047f,
+  0x0481, 0x0481,
+  0x048b, 0x048b,
+  0x048d, 0x048d,
+  0x048f, 0x048f,
+  0x0491, 0x0491,
+  0x0493, 0x0493,
+  0x0495, 0x0495,
+  0x0497, 0x0497,
+  0x0499, 0x0499,
+  0x049b, 0x049b,
+  0x049d, 0x049d,
+  0x049f, 0x049f,
+  0x04a1, 0x04a1,
+  0x04a3, 0x04a3,
+  0x04a5, 0x04a5,
+  0x04a7, 0x04a7,
+  0x04a9, 0x04a9,
+  0x04ab, 0x04ab,
+  0x04ad, 0x04ad,
+  0x04af, 0x04af,
+  0x04b1, 0x04b1,
+  0x04b3, 0x04b3,
+  0x04b5, 0x04b5,
+  0x04b7, 0x04b7,
+  0x04b9, 0x04b9,
+  0x04bb, 0x04bb,
+  0x04bd, 0x04bd,
+  0x04bf, 0x04bf,
+  0x04c2, 0x04c2,
+  0x04c4, 0x04c4,
+  0x04c6, 0x04c6,
+  0x04c8, 0x04c8,
+  0x04ca, 0x04ca,
+  0x04cc, 0x04cc,
+  0x04ce, 0x04ce,
+  0x04d1, 0x04d1,
+  0x04d3, 0x04d3,
+  0x04d5, 0x04d5,
+  0x04d7, 0x04d7,
+  0x04d9, 0x04d9,
+  0x04db, 0x04db,
+  0x04dd, 0x04dd,
+  0x04df, 0x04df,
+  0x04e1, 0x04e1,
+  0x04e3, 0x04e3,
+  0x04e5, 0x04e5,
+  0x04e7, 0x04e7,
+  0x04e9, 0x04e9,
+  0x04eb, 0x04eb,
+  0x04ed, 0x04ed,
+  0x04ef, 0x04ef,
+  0x04f1, 0x04f1,
+  0x04f3, 0x04f3,
+  0x04f5, 0x04f5,
+  0x04f7, 0x04f7,
+  0x04f9, 0x04f9,
+  0x0501, 0x0501,
+  0x0503, 0x0503,
+  0x0505, 0x0505,
+  0x0507, 0x0507,
+  0x0509, 0x0509,
+  0x050b, 0x050b,
+  0x050d, 0x050d,
+  0x050f, 0x050f,
+  0x0561, 0x0587,
+  0x1d00, 0x1d2b,
+  0x1d62, 0x1d77,
+  0x1d79, 0x1d9a,
+  0x1e01, 0x1e01,
+  0x1e03, 0x1e03,
+  0x1e05, 0x1e05,
+  0x1e07, 0x1e07,
+  0x1e09, 0x1e09,
+  0x1e0b, 0x1e0b,
+  0x1e0d, 0x1e0d,
+  0x1e0f, 0x1e0f,
+  0x1e11, 0x1e11,
+  0x1e13, 0x1e13,
+  0x1e15, 0x1e15,
+  0x1e17, 0x1e17,
+  0x1e19, 0x1e19,
+  0x1e1b, 0x1e1b,
+  0x1e1d, 0x1e1d,
+  0x1e1f, 0x1e1f,
+  0x1e21, 0x1e21,
+  0x1e23, 0x1e23,
+  0x1e25, 0x1e25,
+  0x1e27, 0x1e27,
+  0x1e29, 0x1e29,
+  0x1e2b, 0x1e2b,
+  0x1e2d, 0x1e2d,
+  0x1e2f, 0x1e2f,
+  0x1e31, 0x1e31,
+  0x1e33, 0x1e33,
+  0x1e35, 0x1e35,
+  0x1e37, 0x1e37,
+  0x1e39, 0x1e39,
+  0x1e3b, 0x1e3b,
+  0x1e3d, 0x1e3d,
+  0x1e3f, 0x1e3f,
+  0x1e41, 0x1e41,
+  0x1e43, 0x1e43,
+  0x1e45, 0x1e45,
+  0x1e47, 0x1e47,
+  0x1e49, 0x1e49,
+  0x1e4b, 0x1e4b,
+  0x1e4d, 0x1e4d,
+  0x1e4f, 0x1e4f,
+  0x1e51, 0x1e51,
+  0x1e53, 0x1e53,
+  0x1e55, 0x1e55,
+  0x1e57, 0x1e57,
+  0x1e59, 0x1e59,
+  0x1e5b, 0x1e5b,
+  0x1e5d, 0x1e5d,
+  0x1e5f, 0x1e5f,
+  0x1e61, 0x1e61,
+  0x1e63, 0x1e63,
+  0x1e65, 0x1e65,
+  0x1e67, 0x1e67,
+  0x1e69, 0x1e69,
+  0x1e6b, 0x1e6b,
+  0x1e6d, 0x1e6d,
+  0x1e6f, 0x1e6f,
+  0x1e71, 0x1e71,
+  0x1e73, 0x1e73,
+  0x1e75, 0x1e75,
+  0x1e77, 0x1e77,
+  0x1e79, 0x1e79,
+  0x1e7b, 0x1e7b,
+  0x1e7d, 0x1e7d,
+  0x1e7f, 0x1e7f,
+  0x1e81, 0x1e81,
+  0x1e83, 0x1e83,
+  0x1e85, 0x1e85,
+  0x1e87, 0x1e87,
+  0x1e89, 0x1e89,
+  0x1e8b, 0x1e8b,
+  0x1e8d, 0x1e8d,
+  0x1e8f, 0x1e8f,
+  0x1e91, 0x1e91,
+  0x1e93, 0x1e93,
+  0x1e95, 0x1e9b,
+  0x1ea1, 0x1ea1,
+  0x1ea3, 0x1ea3,
+  0x1ea5, 0x1ea5,
+  0x1ea7, 0x1ea7,
+  0x1ea9, 0x1ea9,
+  0x1eab, 0x1eab,
+  0x1ead, 0x1ead,
+  0x1eaf, 0x1eaf,
+  0x1eb1, 0x1eb1,
+  0x1eb3, 0x1eb3,
+  0x1eb5, 0x1eb5,
+  0x1eb7, 0x1eb7,
+  0x1eb9, 0x1eb9,
+  0x1ebb, 0x1ebb,
+  0x1ebd, 0x1ebd,
+  0x1ebf, 0x1ebf,
+  0x1ec1, 0x1ec1,
+  0x1ec3, 0x1ec3,
+  0x1ec5, 0x1ec5,
+  0x1ec7, 0x1ec7,
+  0x1ec9, 0x1ec9,
+  0x1ecb, 0x1ecb,
+  0x1ecd, 0x1ecd,
+  0x1ecf, 0x1ecf,
+  0x1ed1, 0x1ed1,
+  0x1ed3, 0x1ed3,
+  0x1ed5, 0x1ed5,
+  0x1ed7, 0x1ed7,
+  0x1ed9, 0x1ed9,
+  0x1edb, 0x1edb,
+  0x1edd, 0x1edd,
+  0x1edf, 0x1edf,
+  0x1ee1, 0x1ee1,
+  0x1ee3, 0x1ee3,
+  0x1ee5, 0x1ee5,
+  0x1ee7, 0x1ee7,
+  0x1ee9, 0x1ee9,
+  0x1eeb, 0x1eeb,
+  0x1eed, 0x1eed,
+  0x1eef, 0x1eef,
+  0x1ef1, 0x1ef1,
+  0x1ef3, 0x1ef3,
+  0x1ef5, 0x1ef5,
+  0x1ef7, 0x1ef7,
+  0x1ef9, 0x1ef9,
+  0x1f00, 0x1f07,
+  0x1f10, 0x1f15,
+  0x1f20, 0x1f27,
+  0x1f30, 0x1f37,
+  0x1f40, 0x1f45,
+  0x1f50, 0x1f57,
+  0x1f60, 0x1f67,
+  0x1f70, 0x1f7d,
+  0x1f80, 0x1f87,
+  0x1f90, 0x1f97,
+  0x1fa0, 0x1fa7,
+  0x1fb0, 0x1fb4,
+  0x1fb6, 0x1fb7,
+  0x1fbe, 0x1fbe,
+  0x1fc2, 0x1fc4,
+  0x1fc6, 0x1fc7,
+  0x1fd0, 0x1fd3,
+  0x1fd6, 0x1fd7,
+  0x1fe0, 0x1fe7,
+  0x1ff2, 0x1ff4,
+  0x1ff6, 0x1ff7,
+  0x2071, 0x2071,
+  0x207f, 0x207f,
+  0x210a, 0x210a,
+  0x210e, 0x210f,
+  0x2113, 0x2113,
+  0x212f, 0x212f,
+  0x2134, 0x2134,
+  0x2139, 0x2139,
+  0x213c, 0x213d,
+  0x2146, 0x2149,
+  0x2c30, 0x2c5e,
+  0x2c81, 0x2c81,
+  0x2c83, 0x2c83,
+  0x2c85, 0x2c85,
+  0x2c87, 0x2c87,
+  0x2c89, 0x2c89,
+  0x2c8b, 0x2c8b,
+  0x2c8d, 0x2c8d,
+  0x2c8f, 0x2c8f,
+  0x2c91, 0x2c91,
+  0x2c93, 0x2c93,
+  0x2c95, 0x2c95,
+  0x2c97, 0x2c97,
+  0x2c99, 0x2c99,
+  0x2c9b, 0x2c9b,
+  0x2c9d, 0x2c9d,
+  0x2c9f, 0x2c9f,
+  0x2ca1, 0x2ca1,
+  0x2ca3, 0x2ca3,
+  0x2ca5, 0x2ca5,
+  0x2ca7, 0x2ca7,
+  0x2ca9, 0x2ca9,
+  0x2cab, 0x2cab,
+  0x2cad, 0x2cad,
+  0x2caf, 0x2caf,
+  0x2cb1, 0x2cb1,
+  0x2cb3, 0x2cb3,
+  0x2cb5, 0x2cb5,
+  0x2cb7, 0x2cb7,
+  0x2cb9, 0x2cb9,
+  0x2cbb, 0x2cbb,
+  0x2cbd, 0x2cbd,
+  0x2cbf, 0x2cbf,
+  0x2cc1, 0x2cc1,
+  0x2cc3, 0x2cc3,
+  0x2cc5, 0x2cc5,
+  0x2cc7, 0x2cc7,
+  0x2cc9, 0x2cc9,
+  0x2ccb, 0x2ccb,
+  0x2ccd, 0x2ccd,
+  0x2ccf, 0x2ccf,
+  0x2cd1, 0x2cd1,
+  0x2cd3, 0x2cd3,
+  0x2cd5, 0x2cd5,
+  0x2cd7, 0x2cd7,
+  0x2cd9, 0x2cd9,
+  0x2cdb, 0x2cdb,
+  0x2cdd, 0x2cdd,
+  0x2cdf, 0x2cdf,
+  0x2ce1, 0x2ce1,
+  0x2ce3, 0x2ce4,
+  0x2d00, 0x2d25,
+  0xfb00, 0xfb06,
+  0xfb13, 0xfb17,
+  0xff41, 0xff5a,
+  0x10428, 0x1044f,
+  0x1d41a, 0x1d433,
+  0x1d44e, 0x1d454,
+  0x1d456, 0x1d467,
+  0x1d482, 0x1d49b,
+  0x1d4b6, 0x1d4b9,
+  0x1d4bb, 0x1d4bb,
+  0x1d4bd, 0x1d4c3,
+  0x1d4c5, 0x1d4cf,
+  0x1d4ea, 0x1d503,
+  0x1d51e, 0x1d537,
+  0x1d552, 0x1d56b,
+  0x1d586, 0x1d59f,
+  0x1d5ba, 0x1d5d3,
+  0x1d5ee, 0x1d607,
+  0x1d622, 0x1d63b,
+  0x1d656, 0x1d66f,
+  0x1d68a, 0x1d6a5,
+  0x1d6c2, 0x1d6da,
+  0x1d6dc, 0x1d6e1,
+  0x1d6fc, 0x1d714,
+  0x1d716, 0x1d71b,
+  0x1d736, 0x1d74e,
+  0x1d750, 0x1d755,
+  0x1d770, 0x1d788,
+  0x1d78a, 0x1d78f,
+  0x1d7aa, 0x1d7c2,
+  0x1d7c4, 0x1d7c9
+}; /* CR_Ll */
+
+/* 'Lm': General Category */
+static const OnigCodePoint CR_Lm[] = {
+  26,
+  0x02b0, 0x02c1,
+  0x02c6, 0x02d1,
+  0x02e0, 0x02e4,
+  0x02ee, 0x02ee,
+  0x037a, 0x037a,
+  0x0559, 0x0559,
+  0x0640, 0x0640,
+  0x06e5, 0x06e6,
+  0x0e46, 0x0e46,
+  0x0ec6, 0x0ec6,
+  0x10fc, 0x10fc,
+  0x17d7, 0x17d7,
+  0x1843, 0x1843,
+  0x1d2c, 0x1d61,
+  0x1d78, 0x1d78,
+  0x1d9b, 0x1dbf,
+  0x2090, 0x2094,
+  0x2d6f, 0x2d6f,
+  0x3005, 0x3005,
+  0x3031, 0x3035,
+  0x303b, 0x303b,
+  0x309d, 0x309e,
+  0x30fc, 0x30fe,
+  0xa015, 0xa015,
+  0xff70, 0xff70,
+  0xff9e, 0xff9f
+}; /* CR_Lm */
+
+/* 'Lo': General Category */
+static const OnigCodePoint CR_Lo[] = {
+  245,
+  0x01bb, 0x01bb,
+  0x01c0, 0x01c3,
+  0x05d0, 0x05ea,
+  0x05f0, 0x05f2,
+  0x0621, 0x063a,
+  0x0641, 0x064a,
+  0x066e, 0x066f,
+  0x0671, 0x06d3,
+  0x06d5, 0x06d5,
+  0x06ee, 0x06ef,
+  0x06fa, 0x06fc,
+  0x06ff, 0x06ff,
+  0x0710, 0x0710,
+  0x0712, 0x072f,
+  0x074d, 0x076d,
+  0x0780, 0x07a5,
+  0x07b1, 0x07b1,
+  0x0904, 0x0939,
+  0x093d, 0x093d,
+  0x0950, 0x0950,
+  0x0958, 0x0961,
+  0x097d, 0x097d,
+  0x0985, 0x098c,
+  0x098f, 0x0990,
+  0x0993, 0x09a8,
+  0x09aa, 0x09b0,
+  0x09b2, 0x09b2,
+  0x09b6, 0x09b9,
+  0x09bd, 0x09bd,
+  0x09ce, 0x09ce,
+  0x09dc, 0x09dd,
+  0x09df, 0x09e1,
+  0x09f0, 0x09f1,
+  0x0a05, 0x0a0a,
+  0x0a0f, 0x0a10,
+  0x0a13, 0x0a28,
+  0x0a2a, 0x0a30,
+  0x0a32, 0x0a33,
+  0x0a35, 0x0a36,
+  0x0a38, 0x0a39,
+  0x0a59, 0x0a5c,
+  0x0a5e, 0x0a5e,
+  0x0a72, 0x0a74,
+  0x0a85, 0x0a8d,
+  0x0a8f, 0x0a91,
+  0x0a93, 0x0aa8,
+  0x0aaa, 0x0ab0,
+  0x0ab2, 0x0ab3,
+  0x0ab5, 0x0ab9,
+  0x0abd, 0x0abd,
+  0x0ad0, 0x0ad0,
+  0x0ae0, 0x0ae1,
+  0x0b05, 0x0b0c,
+  0x0b0f, 0x0b10,
+  0x0b13, 0x0b28,
+  0x0b2a, 0x0b30,
+  0x0b32, 0x0b33,
+  0x0b35, 0x0b39,
+  0x0b3d, 0x0b3d,
+  0x0b5c, 0x0b5d,
+  0x0b5f, 0x0b61,
+  0x0b71, 0x0b71,
+  0x0b83, 0x0b83,
+  0x0b85, 0x0b8a,
+  0x0b8e, 0x0b90,
+  0x0b92, 0x0b95,
+  0x0b99, 0x0b9a,
+  0x0b9c, 0x0b9c,
+  0x0b9e, 0x0b9f,
+  0x0ba3, 0x0ba4,
+  0x0ba8, 0x0baa,
+  0x0bae, 0x0bb9,
+  0x0c05, 0x0c0c,
+  0x0c0e, 0x0c10,
+  0x0c12, 0x0c28,
+  0x0c2a, 0x0c33,
+  0x0c35, 0x0c39,
+  0x0c60, 0x0c61,
+  0x0c85, 0x0c8c,
+  0x0c8e, 0x0c90,
+  0x0c92, 0x0ca8,
+  0x0caa, 0x0cb3,
+  0x0cb5, 0x0cb9,
+  0x0cbd, 0x0cbd,
+  0x0cde, 0x0cde,
+  0x0ce0, 0x0ce1,
+  0x0d05, 0x0d0c,
+  0x0d0e, 0x0d10,
+  0x0d12, 0x0d28,
+  0x0d2a, 0x0d39,
+  0x0d60, 0x0d61,
+  0x0d85, 0x0d96,
+  0x0d9a, 0x0db1,
+  0x0db3, 0x0dbb,
+  0x0dbd, 0x0dbd,
+  0x0dc0, 0x0dc6,
+  0x0e01, 0x0e30,
+  0x0e32, 0x0e33,
+  0x0e40, 0x0e45,
+  0x0e81, 0x0e82,
+  0x0e84, 0x0e84,
+  0x0e87, 0x0e88,
+  0x0e8a, 0x0e8a,
+  0x0e8d, 0x0e8d,
+  0x0e94, 0x0e97,
+  0x0e99, 0x0e9f,
+  0x0ea1, 0x0ea3,
+  0x0ea5, 0x0ea5,
+  0x0ea7, 0x0ea7,
+  0x0eaa, 0x0eab,
+  0x0ead, 0x0eb0,
+  0x0eb2, 0x0eb3,
+  0x0ebd, 0x0ebd,
+  0x0ec0, 0x0ec4,
+  0x0edc, 0x0edd,
+  0x0f00, 0x0f00,
+  0x0f40, 0x0f47,
+  0x0f49, 0x0f6a,
+  0x0f88, 0x0f8b,
+  0x1000, 0x1021,
+  0x1023, 0x1027,
+  0x1029, 0x102a,
+  0x1050, 0x1055,
+  0x10d0, 0x10fa,
+  0x1100, 0x1159,
+  0x115f, 0x11a2,
+  0x11a8, 0x11f9,
+  0x1200, 0x1248,
+  0x124a, 0x124d,
+  0x1250, 0x1256,
+  0x1258, 0x1258,
+  0x125a, 0x125d,
+  0x1260, 0x1288,
+  0x128a, 0x128d,
+  0x1290, 0x12b0,
+  0x12b2, 0x12b5,
+  0x12b8, 0x12be,
+  0x12c0, 0x12c0,
+  0x12c2, 0x12c5,
+  0x12c8, 0x12d6,
+  0x12d8, 0x1310,
+  0x1312, 0x1315,
+  0x1318, 0x135a,
+  0x1380, 0x138f,
+  0x13a0, 0x13f4,
+  0x1401, 0x166c,
+  0x166f, 0x1676,
+  0x1681, 0x169a,
+  0x16a0, 0x16ea,
+  0x1700, 0x170c,
+  0x170e, 0x1711,
+  0x1720, 0x1731,
+  0x1740, 0x1751,
+  0x1760, 0x176c,
+  0x176e, 0x1770,
+  0x1780, 0x17b3,
+  0x17dc, 0x17dc,
+  0x1820, 0x1842,
+  0x1844, 0x1877,
+  0x1880, 0x18a8,
+  0x1900, 0x191c,
+  0x1950, 0x196d,
+  0x1970, 0x1974,
+  0x1980, 0x19a9,
+  0x19c1, 0x19c7,
+  0x1a00, 0x1a16,
+  0x2135, 0x2138,
+  0x2d30, 0x2d65,
+  0x2d80, 0x2d96,
+  0x2da0, 0x2da6,
+  0x2da8, 0x2dae,
+  0x2db0, 0x2db6,
+  0x2db8, 0x2dbe,
+  0x2dc0, 0x2dc6,
+  0x2dc8, 0x2dce,
+  0x2dd0, 0x2dd6,
+  0x2dd8, 0x2dde,
+  0x3006, 0x3006,
+  0x303c, 0x303c,
+  0x3041, 0x3096,
+  0x309f, 0x309f,
+  0x30a1, 0x30fa,
+  0x30ff, 0x30ff,
+  0x3105, 0x312c,
+  0x3131, 0x318e,
+  0x31a0, 0x31b7,
+  0x31f0, 0x31ff,
+  0x3400, 0x4db5,
+  0x4e00, 0x9fbb,
+  0xa000, 0xa014,
+  0xa016, 0xa48c,
+  0xa800, 0xa801,
+  0xa803, 0xa805,
+  0xa807, 0xa80a,
+  0xa80c, 0xa822,
+  0xac00, 0xd7a3,
+  0xf900, 0xfa2d,
+  0xfa30, 0xfa6a,
+  0xfa70, 0xfad9,
+  0xfb1d, 0xfb1d,
+  0xfb1f, 0xfb28,
+  0xfb2a, 0xfb36,
+  0xfb38, 0xfb3c,
+  0xfb3e, 0xfb3e,
+  0xfb40, 0xfb41,
+  0xfb43, 0xfb44,
+  0xfb46, 0xfbb1,
+  0xfbd3, 0xfd3d,
+  0xfd50, 0xfd8f,
+  0xfd92, 0xfdc7,
+  0xfdf0, 0xfdfb,
+  0xfe70, 0xfe74,
+  0xfe76, 0xfefc,
+  0xff66, 0xff6f,
+  0xff71, 0xff9d,
+  0xffa0, 0xffbe,
+  0xffc2, 0xffc7,
+  0xffca, 0xffcf,
+  0xffd2, 0xffd7,
+  0xffda, 0xffdc,
+  0x10000, 0x1000b,
+  0x1000d, 0x10026,
+  0x10028, 0x1003a,
+  0x1003c, 0x1003d,
+  0x1003f, 0x1004d,
+  0x10050, 0x1005d,
+  0x10080, 0x100fa,
+  0x10300, 0x1031e,
+  0x10330, 0x10349,
+  0x10380, 0x1039d,
+  0x103a0, 0x103c3,
+  0x103c8, 0x103cf,
+  0x10450, 0x1049d,
+  0x10800, 0x10805,
+  0x10808, 0x10808,
+  0x1080a, 0x10835,
+  0x10837, 0x10838,
+  0x1083c, 0x1083c,
+  0x1083f, 0x1083f,
+  0x10a00, 0x10a00,
+  0x10a10, 0x10a13,
+  0x10a15, 0x10a17,
+  0x10a19, 0x10a33,
+  0x20000, 0x2a6d6,
+  0x2f800, 0x2fa1d
+}; /* CR_Lo */
+
+/* 'Lt': General Category */
+static const OnigCodePoint CR_Lt[] = {
+  10,
+  0x01c5, 0x01c5,
+  0x01c8, 0x01c8,
+  0x01cb, 0x01cb,
+  0x01f2, 0x01f2,
+  0x1f88, 0x1f8f,
+  0x1f98, 0x1f9f,
+  0x1fa8, 0x1faf,
+  0x1fbc, 0x1fbc,
+  0x1fcc, 0x1fcc,
+  0x1ffc, 0x1ffc
+}; /* CR_Lt */
+
+/* 'Lu': General Category */
+static const OnigCodePoint CR_Lu[] = {
+  476,
+  0x0041, 0x005a,
+  0x00c0, 0x00d6,
+  0x00d8, 0x00de,
+  0x0100, 0x0100,
+  0x0102, 0x0102,
+  0x0104, 0x0104,
+  0x0106, 0x0106,
+  0x0108, 0x0108,
+  0x010a, 0x010a,
+  0x010c, 0x010c,
+  0x010e, 0x010e,
+  0x0110, 0x0110,
+  0x0112, 0x0112,
+  0x0114, 0x0114,
+  0x0116, 0x0116,
+  0x0118, 0x0118,
+  0x011a, 0x011a,
+  0x011c, 0x011c,
+  0x011e, 0x011e,
+  0x0120, 0x0120,
+  0x0122, 0x0122,
+  0x0124, 0x0124,
+  0x0126, 0x0126,
+  0x0128, 0x0128,
+  0x012a, 0x012a,
+  0x012c, 0x012c,
+  0x012e, 0x012e,
+  0x0130, 0x0130,
+  0x0132, 0x0132,
+  0x0134, 0x0134,
+  0x0136, 0x0136,
+  0x0139, 0x0139,
+  0x013b, 0x013b,
+  0x013d, 0x013d,
+  0x013f, 0x013f,
+  0x0141, 0x0141,
+  0x0143, 0x0143,
+  0x0145, 0x0145,
+  0x0147, 0x0147,
+  0x014a, 0x014a,
+  0x014c, 0x014c,
+  0x014e, 0x014e,
+  0x0150, 0x0150,
+  0x0152, 0x0152,
+  0x0154, 0x0154,
+  0x0156, 0x0156,
+  0x0158, 0x0158,
+  0x015a, 0x015a,
+  0x015c, 0x015c,
+  0x015e, 0x015e,
+  0x0160, 0x0160,
+  0x0162, 0x0162,
+  0x0164, 0x0164,
+  0x0166, 0x0166,
+  0x0168, 0x0168,
+  0x016a, 0x016a,
+  0x016c, 0x016c,
+  0x016e, 0x016e,
+  0x0170, 0x0170,
+  0x0172, 0x0172,
+  0x0174, 0x0174,
+  0x0176, 0x0176,
+  0x0178, 0x0179,
+  0x017b, 0x017b,
+  0x017d, 0x017d,
+  0x0181, 0x0182,
+  0x0184, 0x0184,
+  0x0186, 0x0187,
+  0x0189, 0x018b,
+  0x018e, 0x0191,
+  0x0193, 0x0194,
+  0x0196, 0x0198,
+  0x019c, 0x019d,
+  0x019f, 0x01a0,
+  0x01a2, 0x01a2,
+  0x01a4, 0x01a4,
+  0x01a6, 0x01a7,
+  0x01a9, 0x01a9,
+  0x01ac, 0x01ac,
+  0x01ae, 0x01af,
+  0x01b1, 0x01b3,
+  0x01b5, 0x01b5,
+  0x01b7, 0x01b8,
+  0x01bc, 0x01bc,
+  0x01c4, 0x01c4,
+  0x01c7, 0x01c7,
+  0x01ca, 0x01ca,
+  0x01cd, 0x01cd,
+  0x01cf, 0x01cf,
+  0x01d1, 0x01d1,
+  0x01d3, 0x01d3,
+  0x01d5, 0x01d5,
+  0x01d7, 0x01d7,
+  0x01d9, 0x01d9,
+  0x01db, 0x01db,
+  0x01de, 0x01de,
+  0x01e0, 0x01e0,
+  0x01e2, 0x01e2,
+  0x01e4, 0x01e4,
+  0x01e6, 0x01e6,
+  0x01e8, 0x01e8,
+  0x01ea, 0x01ea,
+  0x01ec, 0x01ec,
+  0x01ee, 0x01ee,
+  0x01f1, 0x01f1,
+  0x01f4, 0x01f4,
+  0x01f6, 0x01f8,
+  0x01fa, 0x01fa,
+  0x01fc, 0x01fc,
+  0x01fe, 0x01fe,
+  0x0200, 0x0200,
+  0x0202, 0x0202,
+  0x0204, 0x0204,
+  0x0206, 0x0206,
+  0x0208, 0x0208,
+  0x020a, 0x020a,
+  0x020c, 0x020c,
+  0x020e, 0x020e,
+  0x0210, 0x0210,
+  0x0212, 0x0212,
+  0x0214, 0x0214,
+  0x0216, 0x0216,
+  0x0218, 0x0218,
+  0x021a, 0x021a,
+  0x021c, 0x021c,
+  0x021e, 0x021e,
+  0x0220, 0x0220,
+  0x0222, 0x0222,
+  0x0224, 0x0224,
+  0x0226, 0x0226,
+  0x0228, 0x0228,
+  0x022a, 0x022a,
+  0x022c, 0x022c,
+  0x022e, 0x022e,
+  0x0230, 0x0230,
+  0x0232, 0x0232,
+  0x023a, 0x023b,
+  0x023d, 0x023e,
+  0x0241, 0x0241,
+  0x0386, 0x0386,
+  0x0388, 0x038a,
+  0x038c, 0x038c,
+  0x038e, 0x038f,
+  0x0391, 0x03a1,
+  0x03a3, 0x03ab,
+  0x03d2, 0x03d4,
+  0x03d8, 0x03d8,
+  0x03da, 0x03da,
+  0x03dc, 0x03dc,
+  0x03de, 0x03de,
+  0x03e0, 0x03e0,
+  0x03e2, 0x03e2,
+  0x03e4, 0x03e4,
+  0x03e6, 0x03e6,
+  0x03e8, 0x03e8,
+  0x03ea, 0x03ea,
+  0x03ec, 0x03ec,
+  0x03ee, 0x03ee,
+  0x03f4, 0x03f4,
+  0x03f7, 0x03f7,
+  0x03f9, 0x03fa,
+  0x03fd, 0x042f,
+  0x0460, 0x0460,
+  0x0462, 0x0462,
+  0x0464, 0x0464,
+  0x0466, 0x0466,
+  0x0468, 0x0468,
+  0x046a, 0x046a,
+  0x046c, 0x046c,
+  0x046e, 0x046e,
+  0x0470, 0x0470,
+  0x0472, 0x0472,
+  0x0474, 0x0474,
+  0x0476, 0x0476,
+  0x0478, 0x0478,
+  0x047a, 0x047a,
+  0x047c, 0x047c,
+  0x047e, 0x047e,
+  0x0480, 0x0480,
+  0x048a, 0x048a,
+  0x048c, 0x048c,
+  0x048e, 0x048e,
+  0x0490, 0x0490,
+  0x0492, 0x0492,
+  0x0494, 0x0494,
+  0x0496, 0x0496,
+  0x0498, 0x0498,
+  0x049a, 0x049a,
+  0x049c, 0x049c,
+  0x049e, 0x049e,
+  0x04a0, 0x04a0,
+  0x04a2, 0x04a2,
+  0x04a4, 0x04a4,
+  0x04a6, 0x04a6,
+  0x04a8, 0x04a8,
+  0x04aa, 0x04aa,
+  0x04ac, 0x04ac,
+  0x04ae, 0x04ae,
+  0x04b0, 0x04b0,
+  0x04b2, 0x04b2,
+  0x04b4, 0x04b4,
+  0x04b6, 0x04b6,
+  0x04b8, 0x04b8,
+  0x04ba, 0x04ba,
+  0x04bc, 0x04bc,
+  0x04be, 0x04be,
+  0x04c0, 0x04c1,
+  0x04c3, 0x04c3,
+  0x04c5, 0x04c5,
+  0x04c7, 0x04c7,
+  0x04c9, 0x04c9,
+  0x04cb, 0x04cb,
+  0x04cd, 0x04cd,
+  0x04d0, 0x04d0,
+  0x04d2, 0x04d2,
+  0x04d4, 0x04d4,
+  0x04d6, 0x04d6,
+  0x04d8, 0x04d8,
+  0x04da, 0x04da,
+  0x04dc, 0x04dc,
+  0x04de, 0x04de,
+  0x04e0, 0x04e0,
+  0x04e2, 0x04e2,
+  0x04e4, 0x04e4,
+  0x04e6, 0x04e6,
+  0x04e8, 0x04e8,
+  0x04ea, 0x04ea,
+  0x04ec, 0x04ec,
+  0x04ee, 0x04ee,
+  0x04f0, 0x04f0,
+  0x04f2, 0x04f2,
+  0x04f4, 0x04f4,
+  0x04f6, 0x04f6,
+  0x04f8, 0x04f8,
+  0x0500, 0x0500,
+  0x0502, 0x0502,
+  0x0504, 0x0504,
+  0x0506, 0x0506,
+  0x0508, 0x0508,
+  0x050a, 0x050a,
+  0x050c, 0x050c,
+  0x050e, 0x050e,
+  0x0531, 0x0556,
+  0x10a0, 0x10c5,
+  0x1e00, 0x1e00,
+  0x1e02, 0x1e02,
+  0x1e04, 0x1e04,
+  0x1e06, 0x1e06,
+  0x1e08, 0x1e08,
+  0x1e0a, 0x1e0a,
+  0x1e0c, 0x1e0c,
+  0x1e0e, 0x1e0e,
+  0x1e10, 0x1e10,
+  0x1e12, 0x1e12,
+  0x1e14, 0x1e14,
+  0x1e16, 0x1e16,
+  0x1e18, 0x1e18,
+  0x1e1a, 0x1e1a,
+  0x1e1c, 0x1e1c,
+  0x1e1e, 0x1e1e,
+  0x1e20, 0x1e20,
+  0x1e22, 0x1e22,
+  0x1e24, 0x1e24,
+  0x1e26, 0x1e26,
+  0x1e28, 0x1e28,
+  0x1e2a, 0x1e2a,
+  0x1e2c, 0x1e2c,
+  0x1e2e, 0x1e2e,
+  0x1e30, 0x1e30,
+  0x1e32, 0x1e32,
+  0x1e34, 0x1e34,
+  0x1e36, 0x1e36,
+  0x1e38, 0x1e38,
+  0x1e3a, 0x1e3a,
+  0x1e3c, 0x1e3c,
+  0x1e3e, 0x1e3e,
+  0x1e40, 0x1e40,
+  0x1e42, 0x1e42,
+  0x1e44, 0x1e44,
+  0x1e46, 0x1e46,
+  0x1e48, 0x1e48,
+  0x1e4a, 0x1e4a,
+  0x1e4c, 0x1e4c,
+  0x1e4e, 0x1e4e,
+  0x1e50, 0x1e50,
+  0x1e52, 0x1e52,
+  0x1e54, 0x1e54,
+  0x1e56, 0x1e56,
+  0x1e58, 0x1e58,
+  0x1e5a, 0x1e5a,
+  0x1e5c, 0x1e5c,
+  0x1e5e, 0x1e5e,
+  0x1e60, 0x1e60,
+  0x1e62, 0x1e62,
+  0x1e64, 0x1e64,
+  0x1e66, 0x1e66,
+  0x1e68, 0x1e68,
+  0x1e6a, 0x1e6a,
+  0x1e6c, 0x1e6c,
+  0x1e6e, 0x1e6e,
+  0x1e70, 0x1e70,
+  0x1e72, 0x1e72,
+  0x1e74, 0x1e74,
+  0x1e76, 0x1e76,
+  0x1e78, 0x1e78,
+  0x1e7a, 0x1e7a,
+  0x1e7c, 0x1e7c,
+  0x1e7e, 0x1e7e,
+  0x1e80, 0x1e80,
+  0x1e82, 0x1e82,
+  0x1e84, 0x1e84,
+  0x1e86, 0x1e86,
+  0x1e88, 0x1e88,
+  0x1e8a, 0x1e8a,
+  0x1e8c, 0x1e8c,
+  0x1e8e, 0x1e8e,
+  0x1e90, 0x1e90,
+  0x1e92, 0x1e92,
+  0x1e94, 0x1e94,
+  0x1ea0, 0x1ea0,
+  0x1ea2, 0x1ea2,
+  0x1ea4, 0x1ea4,
+  0x1ea6, 0x1ea6,
+  0x1ea8, 0x1ea8,
+  0x1eaa, 0x1eaa,
+  0x1eac, 0x1eac,
+  0x1eae, 0x1eae,
+  0x1eb0, 0x1eb0,
+  0x1eb2, 0x1eb2,
+  0x1eb4, 0x1eb4,
+  0x1eb6, 0x1eb6,
+  0x1eb8, 0x1eb8,
+  0x1eba, 0x1eba,
+  0x1ebc, 0x1ebc,
+  0x1ebe, 0x1ebe,
+  0x1ec0, 0x1ec0,
+  0x1ec2, 0x1ec2,
+  0x1ec4, 0x1ec4,
+  0x1ec6, 0x1ec6,
+  0x1ec8, 0x1ec8,
+  0x1eca, 0x1eca,
+  0x1ecc, 0x1ecc,
+  0x1ece, 0x1ece,
+  0x1ed0, 0x1ed0,
+  0x1ed2, 0x1ed2,
+  0x1ed4, 0x1ed4,
+  0x1ed6, 0x1ed6,
+  0x1ed8, 0x1ed8,
+  0x1eda, 0x1eda,
+  0x1edc, 0x1edc,
+  0x1ede, 0x1ede,
+  0x1ee0, 0x1ee0,
+  0x1ee2, 0x1ee2,
+  0x1ee4, 0x1ee4,
+  0x1ee6, 0x1ee6,
+  0x1ee8, 0x1ee8,
+  0x1eea, 0x1eea,
+  0x1eec, 0x1eec,
+  0x1eee, 0x1eee,
+  0x1ef0, 0x1ef0,
+  0x1ef2, 0x1ef2,
+  0x1ef4, 0x1ef4,
+  0x1ef6, 0x1ef6,
+  0x1ef8, 0x1ef8,
+  0x1f08, 0x1f0f,
+  0x1f18, 0x1f1d,
+  0x1f28, 0x1f2f,
+  0x1f38, 0x1f3f,
+  0x1f48, 0x1f4d,
+  0x1f59, 0x1f59,
+  0x1f5b, 0x1f5b,
+  0x1f5d, 0x1f5d,
+  0x1f5f, 0x1f5f,
+  0x1f68, 0x1f6f,
+  0x1fb8, 0x1fbb,
+  0x1fc8, 0x1fcb,
+  0x1fd8, 0x1fdb,
+  0x1fe8, 0x1fec,
+  0x1ff8, 0x1ffb,
+  0x2102, 0x2102,
+  0x2107, 0x2107,
+  0x210b, 0x210d,
+  0x2110, 0x2112,
+  0x2115, 0x2115,
+  0x2119, 0x211d,
+  0x2124, 0x2124,
+  0x2126, 0x2126,
+  0x2128, 0x2128,
+  0x212a, 0x212d,
+  0x2130, 0x2131,
+  0x2133, 0x2133,
+  0x213e, 0x213f,
+  0x2145, 0x2145,
+  0x2c00, 0x2c2e,
+  0x2c80, 0x2c80,
+  0x2c82, 0x2c82,
+  0x2c84, 0x2c84,
+  0x2c86, 0x2c86,
+  0x2c88, 0x2c88,
+  0x2c8a, 0x2c8a,
+  0x2c8c, 0x2c8c,
+  0x2c8e, 0x2c8e,
+  0x2c90, 0x2c90,
+  0x2c92, 0x2c92,
+  0x2c94, 0x2c94,
+  0x2c96, 0x2c96,
+  0x2c98, 0x2c98,
+  0x2c9a, 0x2c9a,
+  0x2c9c, 0x2c9c,
+  0x2c9e, 0x2c9e,
+  0x2ca0, 0x2ca0,
+  0x2ca2, 0x2ca2,
+  0x2ca4, 0x2ca4,
+  0x2ca6, 0x2ca6,
+  0x2ca8, 0x2ca8,
+  0x2caa, 0x2caa,
+  0x2cac, 0x2cac,
+  0x2cae, 0x2cae,
+  0x2cb0, 0x2cb0,
+  0x2cb2, 0x2cb2,
+  0x2cb4, 0x2cb4,
+  0x2cb6, 0x2cb6,
+  0x2cb8, 0x2cb8,
+  0x2cba, 0x2cba,
+  0x2cbc, 0x2cbc,
+  0x2cbe, 0x2cbe,
+  0x2cc0, 0x2cc0,
+  0x2cc2, 0x2cc2,
+  0x2cc4, 0x2cc4,
+  0x2cc6, 0x2cc6,
+  0x2cc8, 0x2cc8,
+  0x2cca, 0x2cca,
+  0x2ccc, 0x2ccc,
+  0x2cce, 0x2cce,
+  0x2cd0, 0x2cd0,
+  0x2cd2, 0x2cd2,
+  0x2cd4, 0x2cd4,
+  0x2cd6, 0x2cd6,
+  0x2cd8, 0x2cd8,
+  0x2cda, 0x2cda,
+  0x2cdc, 0x2cdc,
+  0x2cde, 0x2cde,
+  0x2ce0, 0x2ce0,
+  0x2ce2, 0x2ce2,
+  0xff21, 0xff3a,
+  0x10400, 0x10427,
+  0x1d400, 0x1d419,
+  0x1d434, 0x1d44d,
+  0x1d468, 0x1d481,
+  0x1d49c, 0x1d49c,
+  0x1d49e, 0x1d49f,
+  0x1d4a2, 0x1d4a2,
+  0x1d4a5, 0x1d4a6,
+  0x1d4a9, 0x1d4ac,
+  0x1d4ae, 0x1d4b5,
+  0x1d4d0, 0x1d4e9,
+  0x1d504, 0x1d505,
+  0x1d507, 0x1d50a,
+  0x1d50d, 0x1d514,
+  0x1d516, 0x1d51c,
+  0x1d538, 0x1d539,
+  0x1d53b, 0x1d53e,
+  0x1d540, 0x1d544,
+  0x1d546, 0x1d546,
+  0x1d54a, 0x1d550,
+  0x1d56c, 0x1d585,
+  0x1d5a0, 0x1d5b9,
+  0x1d5d4, 0x1d5ed,
+  0x1d608, 0x1d621,
+  0x1d63c, 0x1d655,
+  0x1d670, 0x1d689,
+  0x1d6a8, 0x1d6c0,
+  0x1d6e2, 0x1d6fa,
+  0x1d71c, 0x1d734,
+  0x1d756, 0x1d76e,
+  0x1d790, 0x1d7a8
+}; /* CR_Lu */
+
+/* 'M': Major Category */
+static const OnigCodePoint CR_M[] = {
+  133,
+  0x0300, 0x036f,
+  0x0483, 0x0486,
+  0x0488, 0x0489,
+  0x0591, 0x05b9,
+  0x05bb, 0x05bd,
+  0x05bf, 0x05bf,
+  0x05c1, 0x05c2,
+  0x05c4, 0x05c5,
+  0x05c7, 0x05c7,
+  0x0610, 0x0615,
+  0x064b, 0x065e,
+  0x0670, 0x0670,
+  0x06d6, 0x06dc,
+  0x06de, 0x06e4,
+  0x06e7, 0x06e8,
+  0x06ea, 0x06ed,
+  0x0711, 0x0711,
+  0x0730, 0x074a,
+  0x07a6, 0x07b0,
+  0x0901, 0x0903,
+  0x093c, 0x093c,
+  0x093e, 0x094d,
+  0x0951, 0x0954,
+  0x0962, 0x0963,
+  0x0981, 0x0983,
+  0x09bc, 0x09bc,
+  0x09be, 0x09c4,
+  0x09c7, 0x09c8,
+  0x09cb, 0x09cd,
+  0x09d7, 0x09d7,
+  0x09e2, 0x09e3,
+  0x0a01, 0x0a03,
+  0x0a3c, 0x0a3c,
+  0x0a3e, 0x0a42,
+  0x0a47, 0x0a48,
+  0x0a4b, 0x0a4d,
+  0x0a70, 0x0a71,
+  0x0a81, 0x0a83,
+  0x0abc, 0x0abc,
+  0x0abe, 0x0ac5,
+  0x0ac7, 0x0ac9,
+  0x0acb, 0x0acd,
+  0x0ae2, 0x0ae3,
+  0x0b01, 0x0b03,
+  0x0b3c, 0x0b3c,
+  0x0b3e, 0x0b43,
+  0x0b47, 0x0b48,
+  0x0b4b, 0x0b4d,
+  0x0b56, 0x0b57,
+  0x0b82, 0x0b82,
+  0x0bbe, 0x0bc2,
+  0x0bc6, 0x0bc8,
+  0x0bca, 0x0bcd,
+  0x0bd7, 0x0bd7,
+  0x0c01, 0x0c03,
+  0x0c3e, 0x0c44,
+  0x0c46, 0x0c48,
+  0x0c4a, 0x0c4d,
+  0x0c55, 0x0c56,
+  0x0c82, 0x0c83,
+  0x0cbc, 0x0cbc,
+  0x0cbe, 0x0cc4,
+  0x0cc6, 0x0cc8,
+  0x0cca, 0x0ccd,
+  0x0cd5, 0x0cd6,
+  0x0d02, 0x0d03,
+  0x0d3e, 0x0d43,
+  0x0d46, 0x0d48,
+  0x0d4a, 0x0d4d,
+  0x0d57, 0x0d57,
+  0x0d82, 0x0d83,
+  0x0dca, 0x0dca,
+  0x0dcf, 0x0dd4,
+  0x0dd6, 0x0dd6,
+  0x0dd8, 0x0ddf,
+  0x0df2, 0x0df3,
+  0x0e31, 0x0e31,
+  0x0e34, 0x0e3a,
+  0x0e47, 0x0e4e,
+  0x0eb1, 0x0eb1,
+  0x0eb4, 0x0eb9,
+  0x0ebb, 0x0ebc,
+  0x0ec8, 0x0ecd,
+  0x0f18, 0x0f19,
+  0x0f35, 0x0f35,
+  0x0f37, 0x0f37,
+  0x0f39, 0x0f39,
+  0x0f3e, 0x0f3f,
+  0x0f71, 0x0f84,
+  0x0f86, 0x0f87,
+  0x0f90, 0x0f97,
+  0x0f99, 0x0fbc,
+  0x0fc6, 0x0fc6,
+  0x102c, 0x1032,
+  0x1036, 0x1039,
+  0x1056, 0x1059,
+  0x135f, 0x135f,
+  0x1712, 0x1714,
+  0x1732, 0x1734,
+  0x1752, 0x1753,
+  0x1772, 0x1773,
+  0x17b6, 0x17d3,
+  0x17dd, 0x17dd,
+  0x180b, 0x180d,
+  0x18a9, 0x18a9,
+  0x1920, 0x192b,
+  0x1930, 0x193b,
+  0x19b0, 0x19c0,
+  0x19c8, 0x19c9,
+  0x1a17, 0x1a1b,
+  0x1dc0, 0x1dc3,
+  0x20d0, 0x20eb,
+  0x302a, 0x302f,
+  0x3099, 0x309a,
+  0xa802, 0xa802,
+  0xa806, 0xa806,
+  0xa80b, 0xa80b,
+  0xa823, 0xa827,
+  0xfb1e, 0xfb1e,
+  0xfe00, 0xfe0f,
+  0xfe20, 0xfe23,
+  0x10a01, 0x10a03,
+  0x10a05, 0x10a06,
+  0x10a0c, 0x10a0f,
+  0x10a38, 0x10a3a,
+  0x10a3f, 0x10a3f,
+  0x1d165, 0x1d169,
+  0x1d16d, 0x1d172,
+  0x1d17b, 0x1d182,
+  0x1d185, 0x1d18b,
+  0x1d1aa, 0x1d1ad,
+  0x1d242, 0x1d244,
+  0xe0100, 0xe01ef
+}; /* CR_M */
+
+/* 'Mc': General Category */
+static const OnigCodePoint CR_Mc[] = {
+  63,
+  0x0903, 0x0903,
+  0x093e, 0x0940,
+  0x0949, 0x094c,
+  0x0982, 0x0983,
+  0x09be, 0x09c0,
+  0x09c7, 0x09c8,
+  0x09cb, 0x09cc,
+  0x09d7, 0x09d7,
+  0x0a03, 0x0a03,
+  0x0a3e, 0x0a40,
+  0x0a83, 0x0a83,
+  0x0abe, 0x0ac0,
+  0x0ac9, 0x0ac9,
+  0x0acb, 0x0acc,
+  0x0b02, 0x0b03,
+  0x0b3e, 0x0b3e,
+  0x0b40, 0x0b40,
+  0x0b47, 0x0b48,
+  0x0b4b, 0x0b4c,
+  0x0b57, 0x0b57,
+  0x0bbe, 0x0bbf,
+  0x0bc1, 0x0bc2,
+  0x0bc6, 0x0bc8,
+  0x0bca, 0x0bcc,
+  0x0bd7, 0x0bd7,
+  0x0c01, 0x0c03,
+  0x0c41, 0x0c44,
+  0x0c82, 0x0c83,
+  0x0cbe, 0x0cbe,
+  0x0cc0, 0x0cc4,
+  0x0cc7, 0x0cc8,
+  0x0cca, 0x0ccb,
+  0x0cd5, 0x0cd6,
+  0x0d02, 0x0d03,
+  0x0d3e, 0x0d40,
+  0x0d46, 0x0d48,
+  0x0d4a, 0x0d4c,
+  0x0d57, 0x0d57,
+  0x0d82, 0x0d83,
+  0x0dcf, 0x0dd1,
+  0x0dd8, 0x0ddf,
+  0x0df2, 0x0df3,
+  0x0f3e, 0x0f3f,
+  0x0f7f, 0x0f7f,
+  0x102c, 0x102c,
+  0x1031, 0x1031,
+  0x1038, 0x1038,
+  0x1056, 0x1057,
+  0x17b6, 0x17b6,
+  0x17be, 0x17c5,
+  0x17c7, 0x17c8,
+  0x1923, 0x1926,
+  0x1929, 0x192b,
+  0x1930, 0x1931,
+  0x1933, 0x1938,
+  0x19b0, 0x19c0,
+  0x19c8, 0x19c9,
+  0x1a19, 0x1a1b,
+  0xa802, 0xa802,
+  0xa823, 0xa824,
+  0xa827, 0xa827,
+  0x1d165, 0x1d166,
+  0x1d16d, 0x1d172
+}; /* CR_Mc */
+
+/* 'Me': General Category */
+static const OnigCodePoint CR_Me[] = {
+  4,
+  0x0488, 0x0489,
+  0x06de, 0x06de,
+  0x20dd, 0x20e0,
+  0x20e2, 0x20e4
+}; /* CR_Me */
+
+/* 'Mn': General Category */
+static const OnigCodePoint CR_Mn[] = {
+  124,
+  0x0300, 0x036f,
+  0x0483, 0x0486,
+  0x0591, 0x05b9,
+  0x05bb, 0x05bd,
+  0x05bf, 0x05bf,
+  0x05c1, 0x05c2,
+  0x05c4, 0x05c5,
+  0x05c7, 0x05c7,
+  0x0610, 0x0615,
+  0x064b, 0x065e,
+  0x0670, 0x0670,
+  0x06d6, 0x06dc,
+  0x06df, 0x06e4,
+  0x06e7, 0x06e8,
+  0x06ea, 0x06ed,
+  0x0711, 0x0711,
+  0x0730, 0x074a,
+  0x07a6, 0x07b0,
+  0x0901, 0x0902,
+  0x093c, 0x093c,
+  0x0941, 0x0948,
+  0x094d, 0x094d,
+  0x0951, 0x0954,
+  0x0962, 0x0963,
+  0x0981, 0x0981,
+  0x09bc, 0x09bc,
+  0x09c1, 0x09c4,
+  0x09cd, 0x09cd,
+  0x09e2, 0x09e3,
+  0x0a01, 0x0a02,
+  0x0a3c, 0x0a3c,
+  0x0a41, 0x0a42,
+  0x0a47, 0x0a48,
+  0x0a4b, 0x0a4d,
+  0x0a70, 0x0a71,
+  0x0a81, 0x0a82,
+  0x0abc, 0x0abc,
+  0x0ac1, 0x0ac5,
+  0x0ac7, 0x0ac8,
+  0x0acd, 0x0acd,
+  0x0ae2, 0x0ae3,
+  0x0b01, 0x0b01,
+  0x0b3c, 0x0b3c,
+  0x0b3f, 0x0b3f,
+  0x0b41, 0x0b43,
+  0x0b4d, 0x0b4d,
+  0x0b56, 0x0b56,
+  0x0b82, 0x0b82,
+  0x0bc0, 0x0bc0,
+  0x0bcd, 0x0bcd,
+  0x0c3e, 0x0c40,
+  0x0c46, 0x0c48,
+  0x0c4a, 0x0c4d,
+  0x0c55, 0x0c56,
+  0x0cbc, 0x0cbc,
+  0x0cbf, 0x0cbf,
+  0x0cc6, 0x0cc6,
+  0x0ccc, 0x0ccd,
+  0x0d41, 0x0d43,
+  0x0d4d, 0x0d4d,
+  0x0dca, 0x0dca,
+  0x0dd2, 0x0dd4,
+  0x0dd6, 0x0dd6,
+  0x0e31, 0x0e31,
+  0x0e34, 0x0e3a,
+  0x0e47, 0x0e4e,
+  0x0eb1, 0x0eb1,
+  0x0eb4, 0x0eb9,
+  0x0ebb, 0x0ebc,
+  0x0ec8, 0x0ecd,
+  0x0f18, 0x0f19,
+  0x0f35, 0x0f35,
+  0x0f37, 0x0f37,
+  0x0f39, 0x0f39,
+  0x0f71, 0x0f7e,
+  0x0f80, 0x0f84,
+  0x0f86, 0x0f87,
+  0x0f90, 0x0f97,
+  0x0f99, 0x0fbc,
+  0x0fc6, 0x0fc6,
+  0x102d, 0x1030,
+  0x1032, 0x1032,
+  0x1036, 0x1037,
+  0x1039, 0x1039,
+  0x1058, 0x1059,
+  0x135f, 0x135f,
+  0x1712, 0x1714,
+  0x1732, 0x1734,
+  0x1752, 0x1753,
+  0x1772, 0x1773,
+  0x17b7, 0x17bd,
+  0x17c6, 0x17c6,
+  0x17c9, 0x17d3,
+  0x17dd, 0x17dd,
+  0x180b, 0x180d,
+  0x18a9, 0x18a9,
+  0x1920, 0x1922,
+  0x1927, 0x1928,
+  0x1932, 0x1932,
+  0x1939, 0x193b,
+  0x1a17, 0x1a18,
+  0x1dc0, 0x1dc3,
+  0x20d0, 0x20dc,
+  0x20e1, 0x20e1,
+  0x20e5, 0x20eb,
+  0x302a, 0x302f,
+  0x3099, 0x309a,
+  0xa806, 0xa806,
+  0xa80b, 0xa80b,
+  0xa825, 0xa826,
+  0xfb1e, 0xfb1e,
+  0xfe00, 0xfe0f,
+  0xfe20, 0xfe23,
+  0x10a01, 0x10a03,
+  0x10a05, 0x10a06,
+  0x10a0c, 0x10a0f,
+  0x10a38, 0x10a3a,
+  0x10a3f, 0x10a3f,
+  0x1d167, 0x1d169,
+  0x1d17b, 0x1d182,
+  0x1d185, 0x1d18b,
+  0x1d1aa, 0x1d1ad,
+  0x1d242, 0x1d244,
+  0xe0100, 0xe01ef
+}; /* CR_Mn */
+
+/* 'N': Major Category */
+static const OnigCodePoint CR_N[] = {
+  53,
+  0x0030, 0x0039,
+  0x00b2, 0x00b3,
+  0x00b9, 0x00b9,
+  0x00bc, 0x00be,
+  0x0660, 0x0669,
+  0x06f0, 0x06f9,
+  0x0966, 0x096f,
+  0x09e6, 0x09ef,
+  0x09f4, 0x09f9,
+  0x0a66, 0x0a6f,
+  0x0ae6, 0x0aef,
+  0x0b66, 0x0b6f,
+  0x0be6, 0x0bf2,
+  0x0c66, 0x0c6f,
+  0x0ce6, 0x0cef,
+  0x0d66, 0x0d6f,
+  0x0e50, 0x0e59,
+  0x0ed0, 0x0ed9,
+  0x0f20, 0x0f33,
+  0x1040, 0x1049,
+  0x1369, 0x137c,
+  0x16ee, 0x16f0,
+  0x17e0, 0x17e9,
+  0x17f0, 0x17f9,
+  0x1810, 0x1819,
+  0x1946, 0x194f,
+  0x19d0, 0x19d9,
+  0x2070, 0x2070,
+  0x2074, 0x2079,
+  0x2080, 0x2089,
+  0x2153, 0x2183,
+  0x2460, 0x249b,
+  0x24ea, 0x24ff,
+  0x2776, 0x2793,
+  0x2cfd, 0x2cfd,
+  0x3007, 0x3007,
+  0x3021, 0x3029,
+  0x3038, 0x303a,
+  0x3192, 0x3195,
+  0x3220, 0x3229,
+  0x3251, 0x325f,
+  0x3280, 0x3289,
+  0x32b1, 0x32bf,
+  0xff10, 0xff19,
+  0x10107, 0x10133,
+  0x10140, 0x10178,
+  0x1018a, 0x1018a,
+  0x10320, 0x10323,
+  0x1034a, 0x1034a,
+  0x103d1, 0x103d5,
+  0x104a0, 0x104a9,
+  0x10a40, 0x10a47,
+  0x1d7ce, 0x1d7ff
+}; /* CR_N */
+
+/* 'Nd': General Category */
+static const OnigCodePoint CR_Nd[] = {
+  23,
+  0x0030, 0x0039,
+  0x0660, 0x0669,
+  0x06f0, 0x06f9,
+  0x0966, 0x096f,
+  0x09e6, 0x09ef,
+  0x0a66, 0x0a6f,
+  0x0ae6, 0x0aef,
+  0x0b66, 0x0b6f,
+  0x0be6, 0x0bef,
+  0x0c66, 0x0c6f,
+  0x0ce6, 0x0cef,
+  0x0d66, 0x0d6f,
+  0x0e50, 0x0e59,
+  0x0ed0, 0x0ed9,
+  0x0f20, 0x0f29,
+  0x1040, 0x1049,
+  0x17e0, 0x17e9,
+  0x1810, 0x1819,
+  0x1946, 0x194f,
+  0x19d0, 0x19d9,
+  0xff10, 0xff19,
+  0x104a0, 0x104a9,
+  0x1d7ce, 0x1d7ff
+}; /* CR_Nd */
+
+/* 'Nl': General Category */
+static const OnigCodePoint CR_Nl[] = {
+  8,
+  0x16ee, 0x16f0,
+  0x2160, 0x2183,
+  0x3007, 0x3007,
+  0x3021, 0x3029,
+  0x3038, 0x303a,
+  0x10140, 0x10174,
+  0x1034a, 0x1034a,
+  0x103d1, 0x103d5
+}; /* CR_Nl */
+
+/* 'No': General Category */
+static const OnigCodePoint CR_No[] = {
+  26,
+  0x00b2, 0x00b3,
+  0x00b9, 0x00b9,
+  0x00bc, 0x00be,
+  0x09f4, 0x09f9,
+  0x0bf0, 0x0bf2,
+  0x0f2a, 0x0f33,
+  0x1369, 0x137c,
+  0x17f0, 0x17f9,
+  0x2070, 0x2070,
+  0x2074, 0x2079,
+  0x2080, 0x2089,
+  0x2153, 0x215f,
+  0x2460, 0x249b,
+  0x24ea, 0x24ff,
+  0x2776, 0x2793,
+  0x2cfd, 0x2cfd,
+  0x3192, 0x3195,
+  0x3220, 0x3229,
+  0x3251, 0x325f,
+  0x3280, 0x3289,
+  0x32b1, 0x32bf,
+  0x10107, 0x10133,
+  0x10175, 0x10178,
+  0x1018a, 0x1018a,
+  0x10320, 0x10323,
+  0x10a40, 0x10a47
+}; /* CR_No */
+
+/* 'P': Major Category */
+static const OnigCodePoint CR_P[] = {
+  96,
+  0x0021, 0x0023,
+  0x0025, 0x002a,
+  0x002c, 0x002f,
+  0x003a, 0x003b,
+  0x003f, 0x0040,
+  0x005b, 0x005d,
+  0x005f, 0x005f,
+  0x007b, 0x007b,
+  0x007d, 0x007d,
+  0x00a1, 0x00a1,
+  0x00ab, 0x00ab,
+  0x00b7, 0x00b7,
+  0x00bb, 0x00bb,
+  0x00bf, 0x00bf,
+  0x037e, 0x037e,
+  0x0387, 0x0387,
+  0x055a, 0x055f,
+  0x0589, 0x058a,
+  0x05be, 0x05be,
+  0x05c0, 0x05c0,
+  0x05c3, 0x05c3,
+  0x05c6, 0x05c6,
+  0x05f3, 0x05f4,
+  0x060c, 0x060d,
+  0x061b, 0x061b,
+  0x061e, 0x061f,
+  0x066a, 0x066d,
+  0x06d4, 0x06d4,
+  0x0700, 0x070d,
+  0x0964, 0x0965,
+  0x0970, 0x0970,
+  0x0df4, 0x0df4,
+  0x0e4f, 0x0e4f,
+  0x0e5a, 0x0e5b,
+  0x0f04, 0x0f12,
+  0x0f3a, 0x0f3d,
+  0x0f85, 0x0f85,
+  0x0fd0, 0x0fd1,
+  0x104a, 0x104f,
+  0x10fb, 0x10fb,
+  0x1361, 0x1368,
+  0x166d, 0x166e,
+  0x169b, 0x169c,
+  0x16eb, 0x16ed,
+  0x1735, 0x1736,
+  0x17d4, 0x17d6,
+  0x17d8, 0x17da,
+  0x1800, 0x180a,
+  0x1944, 0x1945,
+  0x19de, 0x19df,
+  0x1a1e, 0x1a1f,
+  0x2010, 0x2027,
+  0x2030, 0x2043,
+  0x2045, 0x2051,
+  0x2053, 0x205e,
+  0x207d, 0x207e,
+  0x208d, 0x208e,
+  0x2329, 0x232a,
+  0x23b4, 0x23b6,
+  0x2768, 0x2775,
+  0x27c5, 0x27c6,
+  0x27e6, 0x27eb,
+  0x2983, 0x2998,
+  0x29d8, 0x29db,
+  0x29fc, 0x29fd,
+  0x2cf9, 0x2cfc,
+  0x2cfe, 0x2cff,
+  0x2e00, 0x2e17,
+  0x2e1c, 0x2e1d,
+  0x3001, 0x3003,
+  0x3008, 0x3011,
+  0x3014, 0x301f,
+  0x3030, 0x3030,
+  0x303d, 0x303d,
+  0x30a0, 0x30a0,
+  0x30fb, 0x30fb,
+  0xfd3e, 0xfd3f,
+  0xfe10, 0xfe19,
+  0xfe30, 0xfe52,
+  0xfe54, 0xfe61,
+  0xfe63, 0xfe63,
+  0xfe68, 0xfe68,
+  0xfe6a, 0xfe6b,
+  0xff01, 0xff03,
+  0xff05, 0xff0a,
+  0xff0c, 0xff0f,
+  0xff1a, 0xff1b,
+  0xff1f, 0xff20,
+  0xff3b, 0xff3d,
+  0xff3f, 0xff3f,
+  0xff5b, 0xff5b,
+  0xff5d, 0xff5d,
+  0xff5f, 0xff65,
+  0x10100, 0x10101,
+  0x1039f, 0x1039f,
+  0x10a50, 0x10a58
+}; /* CR_P */
+
+/* 'Pc': General Category */
+static const OnigCodePoint CR_Pc[] = {
+  6,
+  0x005f, 0x005f,
+  0x203f, 0x2040,
+  0x2054, 0x2054,
+  0xfe33, 0xfe34,
+  0xfe4d, 0xfe4f,
+  0xff3f, 0xff3f
+}; /* CR_Pc */
+
+/* 'Pd': General Category */
+static const OnigCodePoint CR_Pd[] = {
+  12,
+  0x002d, 0x002d,
+  0x058a, 0x058a,
+  0x1806, 0x1806,
+  0x2010, 0x2015,
+  0x2e17, 0x2e17,
+  0x301c, 0x301c,
+  0x3030, 0x3030,
+  0x30a0, 0x30a0,
+  0xfe31, 0xfe32,
+  0xfe58, 0xfe58,
+  0xfe63, 0xfe63,
+  0xff0d, 0xff0d
+}; /* CR_Pd */
+
+/* 'Pe': General Category */
+static const OnigCodePoint CR_Pe[] = {
+  65,
+  0x0029, 0x0029,
+  0x005d, 0x005d,
+  0x007d, 0x007d,
+  0x0f3b, 0x0f3b,
+  0x0f3d, 0x0f3d,
+  0x169c, 0x169c,
+  0x2046, 0x2046,
+  0x207e, 0x207e,
+  0x208e, 0x208e,
+  0x232a, 0x232a,
+  0x23b5, 0x23b5,
+  0x2769, 0x2769,
+  0x276b, 0x276b,
+  0x276d, 0x276d,
+  0x276f, 0x276f,
+  0x2771, 0x2771,
+  0x2773, 0x2773,
+  0x2775, 0x2775,
+  0x27c6, 0x27c6,
+  0x27e7, 0x27e7,
+  0x27e9, 0x27e9,
+  0x27eb, 0x27eb,
+  0x2984, 0x2984,
+  0x2986, 0x2986,
+  0x2988, 0x2988,
+  0x298a, 0x298a,
+  0x298c, 0x298c,
+  0x298e, 0x298e,
+  0x2990, 0x2990,
+  0x2992, 0x2992,
+  0x2994, 0x2994,
+  0x2996, 0x2996,
+  0x2998, 0x2998,
+  0x29d9, 0x29d9,
+  0x29db, 0x29db,
+  0x29fd, 0x29fd,
+  0x3009, 0x3009,
+  0x300b, 0x300b,
+  0x300d, 0x300d,
+  0x300f, 0x300f,
+  0x3011, 0x3011,
+  0x3015, 0x3015,
+  0x3017, 0x3017,
+  0x3019, 0x3019,
+  0x301b, 0x301b,
+  0x301e, 0x301f,
+  0xfd3f, 0xfd3f,
+  0xfe18, 0xfe18,
+  0xfe36, 0xfe36,
+  0xfe38, 0xfe38,
+  0xfe3a, 0xfe3a,
+  0xfe3c, 0xfe3c,
+  0xfe3e, 0xfe3e,
+  0xfe40, 0xfe40,
+  0xfe42, 0xfe42,
+  0xfe44, 0xfe44,
+  0xfe48, 0xfe48,
+  0xfe5a, 0xfe5a,
+  0xfe5c, 0xfe5c,
+  0xfe5e, 0xfe5e,
+  0xff09, 0xff09,
+  0xff3d, 0xff3d,
+  0xff5d, 0xff5d,
+  0xff60, 0xff60,
+  0xff63, 0xff63
+}; /* CR_Pe */
+
+/* 'Pf': General Category */
+static const OnigCodePoint CR_Pf[] = {
+  9,
+  0x00bb, 0x00bb,
+  0x2019, 0x2019,
+  0x201d, 0x201d,
+  0x203a, 0x203a,
+  0x2e03, 0x2e03,
+  0x2e05, 0x2e05,
+  0x2e0a, 0x2e0a,
+  0x2e0d, 0x2e0d,
+  0x2e1d, 0x2e1d
+}; /* CR_Pf */
+
+/* 'Pi': General Category */
+static const OnigCodePoint CR_Pi[] = {
+  10,
+  0x00ab, 0x00ab,
+  0x2018, 0x2018,
+  0x201b, 0x201c,
+  0x201f, 0x201f,
+  0x2039, 0x2039,
+  0x2e02, 0x2e02,
+  0x2e04, 0x2e04,
+  0x2e09, 0x2e09,
+  0x2e0c, 0x2e0c,
+  0x2e1c, 0x2e1c
+}; /* CR_Pi */
+
+/* 'Po': General Category */
+static const OnigCodePoint CR_Po[] = {
+  88,
+  0x0021, 0x0023,
+  0x0025, 0x0027,
+  0x002a, 0x002a,
+  0x002c, 0x002c,
+  0x002e, 0x002f,
+  0x003a, 0x003b,
+  0x003f, 0x0040,
+  0x005c, 0x005c,
+  0x00a1, 0x00a1,
+  0x00b7, 0x00b7,
+  0x00bf, 0x00bf,
+  0x037e, 0x037e,
+  0x0387, 0x0387,
+  0x055a, 0x055f,
+  0x0589, 0x0589,
+  0x05be, 0x05be,
+  0x05c0, 0x05c0,
+  0x05c3, 0x05c3,
+  0x05c6, 0x05c6,
+  0x05f3, 0x05f4,
+  0x060c, 0x060d,
+  0x061b, 0x061b,
+  0x061e, 0x061f,
+  0x066a, 0x066d,
+  0x06d4, 0x06d4,
+  0x0700, 0x070d,
+  0x0964, 0x0965,
+  0x0970, 0x0970,
+  0x0df4, 0x0df4,
+  0x0e4f, 0x0e4f,
+  0x0e5a, 0x0e5b,
+  0x0f04, 0x0f12,
+  0x0f85, 0x0f85,
+  0x0fd0, 0x0fd1,
+  0x104a, 0x104f,
+  0x10fb, 0x10fb,
+  0x1361, 0x1368,
+  0x166d, 0x166e,
+  0x16eb, 0x16ed,
+  0x1735, 0x1736,
+  0x17d4, 0x17d6,
+  0x17d8, 0x17da,
+  0x1800, 0x1805,
+  0x1807, 0x180a,
+  0x1944, 0x1945,
+  0x19de, 0x19df,
+  0x1a1e, 0x1a1f,
+  0x2016, 0x2017,
+  0x2020, 0x2027,
+  0x2030, 0x2038,
+  0x203b, 0x203e,
+  0x2041, 0x2043,
+  0x2047, 0x2051,
+  0x2053, 0x2053,
+  0x2055, 0x205e,
+  0x23b6, 0x23b6,
+  0x2cf9, 0x2cfc,
+  0x2cfe, 0x2cff,
+  0x2e00, 0x2e01,
+  0x2e06, 0x2e08,
+  0x2e0b, 0x2e0b,
+  0x2e0e, 0x2e16,
+  0x3001, 0x3003,
+  0x303d, 0x303d,
+  0x30fb, 0x30fb,
+  0xfe10, 0xfe16,
+  0xfe19, 0xfe19,
+  0xfe30, 0xfe30,
+  0xfe45, 0xfe46,
+  0xfe49, 0xfe4c,
+  0xfe50, 0xfe52,
+  0xfe54, 0xfe57,
+  0xfe5f, 0xfe61,
+  0xfe68, 0xfe68,
+  0xfe6a, 0xfe6b,
+  0xff01, 0xff03,
+  0xff05, 0xff07,
+  0xff0a, 0xff0a,
+  0xff0c, 0xff0c,
+  0xff0e, 0xff0f,
+  0xff1a, 0xff1b,
+  0xff1f, 0xff20,
+  0xff3c, 0xff3c,
+  0xff61, 0xff61,
+  0xff64, 0xff65,
+  0x10100, 0x10101,
+  0x1039f, 0x1039f,
+  0x10a50, 0x10a58
+}; /* CR_Po */
+
+/* 'Ps': General Category */
+static const OnigCodePoint CR_Ps[] = {
+  67,
+  0x0028, 0x0028,
+  0x005b, 0x005b,
+  0x007b, 0x007b,
+  0x0f3a, 0x0f3a,
+  0x0f3c, 0x0f3c,
+  0x169b, 0x169b,
+  0x201a, 0x201a,
+  0x201e, 0x201e,
+  0x2045, 0x2045,
+  0x207d, 0x207d,
+  0x208d, 0x208d,
+  0x2329, 0x2329,
+  0x23b4, 0x23b4,
+  0x2768, 0x2768,
+  0x276a, 0x276a,
+  0x276c, 0x276c,
+  0x276e, 0x276e,
+  0x2770, 0x2770,
+  0x2772, 0x2772,
+  0x2774, 0x2774,
+  0x27c5, 0x27c5,
+  0x27e6, 0x27e6,
+  0x27e8, 0x27e8,
+  0x27ea, 0x27ea,
+  0x2983, 0x2983,
+  0x2985, 0x2985,
+  0x2987, 0x2987,
+  0x2989, 0x2989,
+  0x298b, 0x298b,
+  0x298d, 0x298d,
+  0x298f, 0x298f,
+  0x2991, 0x2991,
+  0x2993, 0x2993,
+  0x2995, 0x2995,
+  0x2997, 0x2997,
+  0x29d8, 0x29d8,
+  0x29da, 0x29da,
+  0x29fc, 0x29fc,
+  0x3008, 0x3008,
+  0x300a, 0x300a,
+  0x300c, 0x300c,
+  0x300e, 0x300e,
+  0x3010, 0x3010,
+  0x3014, 0x3014,
+  0x3016, 0x3016,
+  0x3018, 0x3018,
+  0x301a, 0x301a,
+  0x301d, 0x301d,
+  0xfd3e, 0xfd3e,
+  0xfe17, 0xfe17,
+  0xfe35, 0xfe35,
+  0xfe37, 0xfe37,
+  0xfe39, 0xfe39,
+  0xfe3b, 0xfe3b,
+  0xfe3d, 0xfe3d,
+  0xfe3f, 0xfe3f,
+  0xfe41, 0xfe41,
+  0xfe43, 0xfe43,
+  0xfe47, 0xfe47,
+  0xfe59, 0xfe59,
+  0xfe5b, 0xfe5b,
+  0xfe5d, 0xfe5d,
+  0xff08, 0xff08,
+  0xff3b, 0xff3b,
+  0xff5b, 0xff5b,
+  0xff5f, 0xff5f,
+  0xff62, 0xff62
+}; /* CR_Ps */
+
+/* 'S': Major Category */
+static const OnigCodePoint CR_S[] = {
+  162,
+  0x0024, 0x0024,
+  0x002b, 0x002b,
+  0x003c, 0x003e,
+  0x005e, 0x005e,
+  0x0060, 0x0060,
+  0x007c, 0x007c,
+  0x007e, 0x007e,
+  0x00a2, 0x00a9,
+  0x00ac, 0x00ac,
+  0x00ae, 0x00b1,
+  0x00b4, 0x00b4,
+  0x00b6, 0x00b6,
+  0x00b8, 0x00b8,
+  0x00d7, 0x00d7,
+  0x00f7, 0x00f7,
+  0x02c2, 0x02c5,
+  0x02d2, 0x02df,
+  0x02e5, 0x02ed,
+  0x02ef, 0x02ff,
+  0x0374, 0x0375,
+  0x0384, 0x0385,
+  0x03f6, 0x03f6,
+  0x0482, 0x0482,
+  0x060b, 0x060b,
+  0x060e, 0x060f,
+  0x06e9, 0x06e9,
+  0x06fd, 0x06fe,
+  0x09f2, 0x09f3,
+  0x09fa, 0x09fa,
+  0x0af1, 0x0af1,
+  0x0b70, 0x0b70,
+  0x0bf3, 0x0bfa,
+  0x0e3f, 0x0e3f,
+  0x0f01, 0x0f03,
+  0x0f13, 0x0f17,
+  0x0f1a, 0x0f1f,
+  0x0f34, 0x0f34,
+  0x0f36, 0x0f36,
+  0x0f38, 0x0f38,
+  0x0fbe, 0x0fc5,
+  0x0fc7, 0x0fcc,
+  0x0fcf, 0x0fcf,
+  0x1360, 0x1360,
+  0x1390, 0x1399,
+  0x17db, 0x17db,
+  0x1940, 0x1940,
+  0x19e0, 0x19ff,
+  0x1fbd, 0x1fbd,
+  0x1fbf, 0x1fc1,
+  0x1fcd, 0x1fcf,
+  0x1fdd, 0x1fdf,
+  0x1fed, 0x1fef,
+  0x1ffd, 0x1ffe,
+  0x2044, 0x2044,
+  0x2052, 0x2052,
+  0x207a, 0x207c,
+  0x208a, 0x208c,
+  0x20a0, 0x20b5,
+  0x2100, 0x2101,
+  0x2103, 0x2106,
+  0x2108, 0x2109,
+  0x2114, 0x2114,
+  0x2116, 0x2118,
+  0x211e, 0x2123,
+  0x2125, 0x2125,
+  0x2127, 0x2127,
+  0x2129, 0x2129,
+  0x212e, 0x212e,
+  0x2132, 0x2132,
+  0x213a, 0x213b,
+  0x2140, 0x2144,
+  0x214a, 0x214c,
+  0x2190, 0x2328,
+  0x232b, 0x23b3,
+  0x23b7, 0x23db,
+  0x2400, 0x2426,
+  0x2440, 0x244a,
+  0x249c, 0x24e9,
+  0x2500, 0x269c,
+  0x26a0, 0x26b1,
+  0x2701, 0x2704,
+  0x2706, 0x2709,
+  0x270c, 0x2727,
+  0x2729, 0x274b,
+  0x274d, 0x274d,
+  0x274f, 0x2752,
+  0x2756, 0x2756,
+  0x2758, 0x275e,
+  0x2761, 0x2767,
+  0x2794, 0x2794,
+  0x2798, 0x27af,
+  0x27b1, 0x27be,
+  0x27c0, 0x27c4,
+  0x27d0, 0x27e5,
+  0x27f0, 0x2982,
+  0x2999, 0x29d7,
+  0x29dc, 0x29fb,
+  0x29fe, 0x2b13,
+  0x2ce5, 0x2cea,
+  0x2e80, 0x2e99,
+  0x2e9b, 0x2ef3,
+  0x2f00, 0x2fd5,
+  0x2ff0, 0x2ffb,
+  0x3004, 0x3004,
+  0x3012, 0x3013,
+  0x3020, 0x3020,
+  0x3036, 0x3037,
+  0x303e, 0x303f,
+  0x309b, 0x309c,
+  0x3190, 0x3191,
+  0x3196, 0x319f,
+  0x31c0, 0x31cf,
+  0x3200, 0x321e,
+  0x322a, 0x3243,
+  0x3250, 0x3250,
+  0x3260, 0x327f,
+  0x328a, 0x32b0,
+  0x32c0, 0x32fe,
+  0x3300, 0x33ff,
+  0x4dc0, 0x4dff,
+  0xa490, 0xa4c6,
+  0xa700, 0xa716,
+  0xa828, 0xa82b,
+  0xfb29, 0xfb29,
+  0xfdfc, 0xfdfd,
+  0xfe62, 0xfe62,
+  0xfe64, 0xfe66,
+  0xfe69, 0xfe69,
+  0xff04, 0xff04,
+  0xff0b, 0xff0b,
+  0xff1c, 0xff1e,
+  0xff3e, 0xff3e,
+  0xff40, 0xff40,
+  0xff5c, 0xff5c,
+  0xff5e, 0xff5e,
+  0xffe0, 0xffe6,
+  0xffe8, 0xffee,
+  0xfffc, 0xfffd,
+  0x10102, 0x10102,
+  0x10137, 0x1013f,
+  0x10179, 0x10189,
+  0x103d0, 0x103d0,
+  0x1d000, 0x1d0f5,
+  0x1d100, 0x1d126,
+  0x1d12a, 0x1d164,
+  0x1d16a, 0x1d16c,
+  0x1d183, 0x1d184,
+  0x1d18c, 0x1d1a9,
+  0x1d1ae, 0x1d1dd,
+  0x1d200, 0x1d241,
+  0x1d245, 0x1d245,
+  0x1d300, 0x1d356,
+  0x1d6c1, 0x1d6c1,
+  0x1d6db, 0x1d6db,
+  0x1d6fb, 0x1d6fb,
+  0x1d715, 0x1d715,
+  0x1d735, 0x1d735,
+  0x1d74f, 0x1d74f,
+  0x1d76f, 0x1d76f,
+  0x1d789, 0x1d789,
+  0x1d7a9, 0x1d7a9,
+  0x1d7c3, 0x1d7c3
+}; /* CR_S */
+
+/* 'Sc': General Category */
+static const OnigCodePoint CR_Sc[] = {
+  14,
+  0x0024, 0x0024,
+  0x00a2, 0x00a5,
+  0x060b, 0x060b,
+  0x09f2, 0x09f3,
+  0x0af1, 0x0af1,
+  0x0bf9, 0x0bf9,
+  0x0e3f, 0x0e3f,
+  0x17db, 0x17db,
+  0x20a0, 0x20b5,
+  0xfdfc, 0xfdfc,
+  0xfe69, 0xfe69,
+  0xff04, 0xff04,
+  0xffe0, 0xffe1,
+  0xffe5, 0xffe6
+}; /* CR_Sc */
+
+/* 'Sk': General Category */
+static const OnigCodePoint CR_Sk[] = {
+  23,
+  0x005e, 0x005e,
+  0x0060, 0x0060,
+  0x00a8, 0x00a8,
+  0x00af, 0x00af,
+  0x00b4, 0x00b4,
+  0x00b8, 0x00b8,
+  0x02c2, 0x02c5,
+  0x02d2, 0x02df,
+  0x02e5, 0x02ed,
+  0x02ef, 0x02ff,
+  0x0374, 0x0375,
+  0x0384, 0x0385,
+  0x1fbd, 0x1fbd,
+  0x1fbf, 0x1fc1,
+  0x1fcd, 0x1fcf,
+  0x1fdd, 0x1fdf,
+  0x1fed, 0x1fef,
+  0x1ffd, 0x1ffe,
+  0x309b, 0x309c,
+  0xa700, 0xa716,
+  0xff3e, 0xff3e,
+  0xff40, 0xff40,
+  0xffe3, 0xffe3
+}; /* CR_Sk */
+
+/* 'Sm': General Category */
+static const OnigCodePoint CR_Sm[] = {
+  59,
+  0x002b, 0x002b,
+  0x003c, 0x003e,
+  0x007c, 0x007c,
+  0x007e, 0x007e,
+  0x00ac, 0x00ac,
+  0x00b1, 0x00b1,
+  0x00d7, 0x00d7,
+  0x00f7, 0x00f7,
+  0x03f6, 0x03f6,
+  0x2044, 0x2044,
+  0x2052, 0x2052,
+  0x207a, 0x207c,
+  0x208a, 0x208c,
+  0x2140, 0x2144,
+  0x214b, 0x214b,
+  0x2190, 0x2194,
+  0x219a, 0x219b,
+  0x21a0, 0x21a0,
+  0x21a3, 0x21a3,
+  0x21a6, 0x21a6,
+  0x21ae, 0x21ae,
+  0x21ce, 0x21cf,
+  0x21d2, 0x21d2,
+  0x21d4, 0x21d4,
+  0x21f4, 0x22ff,
+  0x2308, 0x230b,
+  0x2320, 0x2321,
+  0x237c, 0x237c,
+  0x239b, 0x23b3,
+  0x25b7, 0x25b7,
+  0x25c1, 0x25c1,
+  0x25f8, 0x25ff,
+  0x266f, 0x266f,
+  0x27c0, 0x27c4,
+  0x27d0, 0x27e5,
+  0x27f0, 0x27ff,
+  0x2900, 0x2982,
+  0x2999, 0x29d7,
+  0x29dc, 0x29fb,
+  0x29fe, 0x2aff,
+  0xfb29, 0xfb29,
+  0xfe62, 0xfe62,
+  0xfe64, 0xfe66,
+  0xff0b, 0xff0b,
+  0xff1c, 0xff1e,
+  0xff5c, 0xff5c,
+  0xff5e, 0xff5e,
+  0xffe2, 0xffe2,
+  0xffe9, 0xffec,
+  0x1d6c1, 0x1d6c1,
+  0x1d6db, 0x1d6db,
+  0x1d6fb, 0x1d6fb,
+  0x1d715, 0x1d715,
+  0x1d735, 0x1d735,
+  0x1d74f, 0x1d74f,
+  0x1d76f, 0x1d76f,
+  0x1d789, 0x1d789,
+  0x1d7a9, 0x1d7a9,
+  0x1d7c3, 0x1d7c3
+}; /* CR_Sm */
+
+/* 'So': General Category */
+static const OnigCodePoint CR_So[] = {
+  120,
+  0x00a6, 0x00a7,
+  0x00a9, 0x00a9,
+  0x00ae, 0x00ae,
+  0x00b0, 0x00b0,
+  0x00b6, 0x00b6,
+  0x0482, 0x0482,
+  0x060e, 0x060f,
+  0x06e9, 0x06e9,
+  0x06fd, 0x06fe,
+  0x09fa, 0x09fa,
+  0x0b70, 0x0b70,
+  0x0bf3, 0x0bf8,
+  0x0bfa, 0x0bfa,
+  0x0f01, 0x0f03,
+  0x0f13, 0x0f17,
+  0x0f1a, 0x0f1f,
+  0x0f34, 0x0f34,
+  0x0f36, 0x0f36,
+  0x0f38, 0x0f38,
+  0x0fbe, 0x0fc5,
+  0x0fc7, 0x0fcc,
+  0x0fcf, 0x0fcf,
+  0x1360, 0x1360,
+  0x1390, 0x1399,
+  0x1940, 0x1940,
+  0x19e0, 0x19ff,
+  0x2100, 0x2101,
+  0x2103, 0x2106,
+  0x2108, 0x2109,
+  0x2114, 0x2114,
+  0x2116, 0x2118,
+  0x211e, 0x2123,
+  0x2125, 0x2125,
+  0x2127, 0x2127,
+  0x2129, 0x2129,
+  0x212e, 0x212e,
+  0x2132, 0x2132,
+  0x213a, 0x213b,
+  0x214a, 0x214a,
+  0x214c, 0x214c,
+  0x2195, 0x2199,
+  0x219c, 0x219f,
+  0x21a1, 0x21a2,
+  0x21a4, 0x21a5,
+  0x21a7, 0x21ad,
+  0x21af, 0x21cd,
+  0x21d0, 0x21d1,
+  0x21d3, 0x21d3,
+  0x21d5, 0x21f3,
+  0x2300, 0x2307,
+  0x230c, 0x231f,
+  0x2322, 0x2328,
+  0x232b, 0x237b,
+  0x237d, 0x239a,
+  0x23b7, 0x23db,
+  0x2400, 0x2426,
+  0x2440, 0x244a,
+  0x249c, 0x24e9,
+  0x2500, 0x25b6,
+  0x25b8, 0x25c0,
+  0x25c2, 0x25f7,
+  0x2600, 0x266e,
+  0x2670, 0x269c,
+  0x26a0, 0x26b1,
+  0x2701, 0x2704,
+  0x2706, 0x2709,
+  0x270c, 0x2727,
+  0x2729, 0x274b,
+  0x274d, 0x274d,
+  0x274f, 0x2752,
+  0x2756, 0x2756,
+  0x2758, 0x275e,
+  0x2761, 0x2767,
+  0x2794, 0x2794,
+  0x2798, 0x27af,
+  0x27b1, 0x27be,
+  0x2800, 0x28ff,
+  0x2b00, 0x2b13,
+  0x2ce5, 0x2cea,
+  0x2e80, 0x2e99,
+  0x2e9b, 0x2ef3,
+  0x2f00, 0x2fd5,
+  0x2ff0, 0x2ffb,
+  0x3004, 0x3004,
+  0x3012, 0x3013,
+  0x3020, 0x3020,
+  0x3036, 0x3037,
+  0x303e, 0x303f,
+  0x3190, 0x3191,
+  0x3196, 0x319f,
+  0x31c0, 0x31cf,
+  0x3200, 0x321e,
+  0x322a, 0x3243,
+  0x3250, 0x3250,
+  0x3260, 0x327f,
+  0x328a, 0x32b0,
+  0x32c0, 0x32fe,
+  0x3300, 0x33ff,
+  0x4dc0, 0x4dff,
+  0xa490, 0xa4c6,
+  0xa828, 0xa82b,
+  0xfdfd, 0xfdfd,
+  0xffe4, 0xffe4,
+  0xffe8, 0xffe8,
+  0xffed, 0xffee,
+  0xfffc, 0xfffd,
+  0x10102, 0x10102,
+  0x10137, 0x1013f,
+  0x10179, 0x10189,
+  0x103d0, 0x103d0,
+  0x1d000, 0x1d0f5,
+  0x1d100, 0x1d126,
+  0x1d12a, 0x1d164,
+  0x1d16a, 0x1d16c,
+  0x1d183, 0x1d184,
+  0x1d18c, 0x1d1a9,
+  0x1d1ae, 0x1d1dd,
+  0x1d200, 0x1d241,
+  0x1d245, 0x1d245,
+  0x1d300, 0x1d356
+}; /* CR_So */
+
+/* 'Z': Major Category */
+static const OnigCodePoint CR_Z[] = {
+  9,
+  0x0020, 0x0020,
+  0x00a0, 0x00a0,
+  0x1680, 0x1680,
+  0x180e, 0x180e,
+  0x2000, 0x200a,
+  0x2028, 0x2029,
+  0x202f, 0x202f,
+  0x205f, 0x205f,
+  0x3000, 0x3000
+}; /* CR_Z */
+
+/* 'Zl': General Category */
+static const OnigCodePoint CR_Zl[] = {
+  1,
+  0x2028, 0x2028
+}; /* CR_Zl */
+
+/* 'Zp': General Category */
+static const OnigCodePoint CR_Zp[] = {
+  1,
+  0x2029, 0x2029
+}; /* CR_Zp */
+
+/* 'Zs': General Category */
+static const OnigCodePoint CR_Zs[] = {
+  8,
+  0x0020, 0x0020,
+  0x00a0, 0x00a0,
+  0x1680, 0x1680,
+  0x180e, 0x180e,
+  0x2000, 0x200a,
+  0x202f, 0x202f,
+  0x205f, 0x205f,
+  0x3000, 0x3000
+}; /* CR_Zs */
+
+/* 'Arabic': Script */
+static const OnigCodePoint CR_Arabic[] = {
+  17,
+  0x060b, 0x060b,
+  0x060d, 0x0615,
+  0x061e, 0x061e,
+  0x0621, 0x063a,
+  0x0641, 0x064a,
+  0x0656, 0x065e,
+  0x066a, 0x066f,
+  0x0671, 0x06dc,
+  0x06de, 0x06ff,
+  0x0750, 0x076d,
+  0xfb50, 0xfbb1,
+  0xfbd3, 0xfd3d,
+  0xfd50, 0xfd8f,
+  0xfd92, 0xfdc7,
+  0xfdf0, 0xfdfc,
+  0xfe70, 0xfe74,
+  0xfe76, 0xfefc
+}; /* CR_Arabic */
+
+/* 'Armenian': Script */
+static const OnigCodePoint CR_Armenian[] = {
+  5,
+  0x0531, 0x0556,
+  0x0559, 0x055f,
+  0x0561, 0x0587,
+  0x058a, 0x058a,
+  0xfb13, 0xfb17
+}; /* CR_Armenian */
+
+/* 'Bengali': Script */
+static const OnigCodePoint CR_Bengali[] = {
+  14,
+  0x0981, 0x0983,
+  0x0985, 0x098c,
+  0x098f, 0x0990,
+  0x0993, 0x09a8,
+  0x09aa, 0x09b0,
+  0x09b2, 0x09b2,
+  0x09b6, 0x09b9,
+  0x09bc, 0x09c4,
+  0x09c7, 0x09c8,
+  0x09cb, 0x09ce,
+  0x09d7, 0x09d7,
+  0x09dc, 0x09dd,
+  0x09df, 0x09e3,
+  0x09e6, 0x09fa
+}; /* CR_Bengali */
+
+/* 'Bopomofo': Script */
+static const OnigCodePoint CR_Bopomofo[] = {
+  2,
+  0x3105, 0x312c,
+  0x31a0, 0x31b7
+}; /* CR_Bopomofo */
+
+/* 'Braille': Script */
+static const OnigCodePoint CR_Braille[] = {
+  1,
+  0x2800, 0x28ff
+}; /* CR_Braille */
+
+/* 'Buginese': Script */
+static const OnigCodePoint CR_Buginese[] = {
+  2,
+  0x1a00, 0x1a1b,
+  0x1a1e, 0x1a1f
+}; /* CR_Buginese */
+
+/* 'Buhid': Script */
+static const OnigCodePoint CR_Buhid[] = {
+  1,
+  0x1740, 0x1753
+}; /* CR_Buhid */
+
+/* 'Canadian_Aboriginal': Script */
+static const OnigCodePoint CR_Canadian_Aboriginal[] = {
+  1,
+  0x1401, 0x1676
+}; /* CR_Canadian_Aboriginal */
+
+/* 'Cherokee': Script */
+static const OnigCodePoint CR_Cherokee[] = {
+  1,
+  0x13a0, 0x13f4
+}; /* CR_Cherokee */
+
+/* 'Common': Script */
+static const OnigCodePoint CR_Common[] = {
+  126,
+  0x0000, 0x0040,
+  0x005b, 0x0060,
+  0x007b, 0x00a9,
+  0x00ab, 0x00b9,
+  0x00bb, 0x00bf,
+  0x00d7, 0x00d7,
+  0x00f7, 0x00f7,
+  0x02b9, 0x02df,
+  0x02e5, 0x02ff,
+  0x037e, 0x037e,
+  0x0387, 0x0387,
+  0x0589, 0x0589,
+  0x0600, 0x0603,
+  0x060c, 0x060c,
+  0x061b, 0x061b,
+  0x061f, 0x061f,
+  0x0640, 0x0640,
+  0x0660, 0x0669,
+  0x06dd, 0x06dd,
+  0x0964, 0x0965,
+  0x0970, 0x0970,
+  0x0e3f, 0x0e3f,
+  0x10fb, 0x10fb,
+  0x16eb, 0x16ed,
+  0x1735, 0x1736,
+  0x2000, 0x200b,
+  0x200e, 0x2063,
+  0x206a, 0x2070,
+  0x2074, 0x207e,
+  0x2080, 0x208e,
+  0x20a0, 0x20b5,
+  0x2100, 0x2125,
+  0x2127, 0x2129,
+  0x212c, 0x214c,
+  0x2153, 0x2183,
+  0x2190, 0x23db,
+  0x2400, 0x2426,
+  0x2440, 0x244a,
+  0x2460, 0x269c,
+  0x26a0, 0x26b1,
+  0x2701, 0x2704,
+  0x2706, 0x2709,
+  0x270c, 0x2727,
+  0x2729, 0x274b,
+  0x274d, 0x274d,
+  0x274f, 0x2752,
+  0x2756, 0x2756,
+  0x2758, 0x275e,
+  0x2761, 0x2794,
+  0x2798, 0x27af,
+  0x27b1, 0x27be,
+  0x27c0, 0x27c6,
+  0x27d0, 0x27eb,
+  0x27f0, 0x27ff,
+  0x2900, 0x2b13,
+  0x2e00, 0x2e17,
+  0x2e1c, 0x2e1d,
+  0x2ff0, 0x2ffb,
+  0x3000, 0x3004,
+  0x3006, 0x3006,
+  0x3008, 0x3020,
+  0x3030, 0x3037,
+  0x303c, 0x303f,
+  0x309b, 0x309c,
+  0x30a0, 0x30a0,
+  0x30fb, 0x30fc,
+  0x3190, 0x319f,
+  0x31c0, 0x31cf,
+  0x3220, 0x3243,
+  0x3250, 0x325f,
+  0x327e, 0x32fe,
+  0x3300, 0x33ff,
+  0x4dc0, 0x4dff,
+  0xa700, 0xa716,
+  0xe000, 0xf8ff,
+  0xfd3e, 0xfd3f,
+  0xfdfd, 0xfdfd,
+  0xfe10, 0xfe19,
+  0xfe30, 0xfe52,
+  0xfe54, 0xfe66,
+  0xfe68, 0xfe6b,
+  0xfeff, 0xfeff,
+  0xff01, 0xff20,
+  0xff3b, 0xff40,
+  0xff5b, 0xff65,
+  0xff70, 0xff70,
+  0xff9e, 0xff9f,
+  0xffe0, 0xffe6,
+  0xffe8, 0xffee,
+  0xfff9, 0xfffd,
+  0x10100, 0x10102,
+  0x10107, 0x10133,
+  0x10137, 0x1013f,
+  0x1d000, 0x1d0f5,
+  0x1d100, 0x1d126,
+  0x1d12a, 0x1d166,
+  0x1d16a, 0x1d17a,
+  0x1d183, 0x1d184,
+  0x1d18c, 0x1d1a9,
+  0x1d1ae, 0x1d1dd,
+  0x1d300, 0x1d356,
+  0x1d400, 0x1d454,
+  0x1d456, 0x1d49c,
+  0x1d49e, 0x1d49f,
+  0x1d4a2, 0x1d4a2,
+  0x1d4a5, 0x1d4a6,
+  0x1d4a9, 0x1d4ac,
+  0x1d4ae, 0x1d4b9,
+  0x1d4bb, 0x1d4bb,
+  0x1d4bd, 0x1d4c3,
+  0x1d4c5, 0x1d505,
+  0x1d507, 0x1d50a,
+  0x1d50d, 0x1d514,
+  0x1d516, 0x1d51c,
+  0x1d51e, 0x1d539,
+  0x1d53b, 0x1d53e,
+  0x1d540, 0x1d544,
+  0x1d546, 0x1d546,
+  0x1d54a, 0x1d550,
+  0x1d552, 0x1d6a5,
+  0x1d6a8, 0x1d7c9,
+  0x1d7ce, 0x1d7ff,
+  0xe0001, 0xe0001,
+  0xe0020, 0xe007f,
+  0xf0000, 0xffffd,
+  0x100000, 0x10fffd
+}; /* CR_Common */
+
+/* 'Coptic': Script */
+static const OnigCodePoint CR_Coptic[] = {
+  3,
+  0x03e2, 0x03ef,
+  0x2c80, 0x2cea,
+  0x2cf9, 0x2cff
+}; /* CR_Coptic */
+
+/* 'Cypriot': Script */
+static const OnigCodePoint CR_Cypriot[] = {
+  6,
+  0x10800, 0x10805,
+  0x10808, 0x10808,
+  0x1080a, 0x10835,
+  0x10837, 0x10838,
+  0x1083c, 0x1083c,
+  0x1083f, 0x1083f
+}; /* CR_Cypriot */
+
+/* 'Cyrillic': Script */
+static const OnigCodePoint CR_Cyrillic[] = {
+  6,
+  0x0400, 0x0486,
+  0x0488, 0x04ce,
+  0x04d0, 0x04f9,
+  0x0500, 0x050f,
+  0x1d2b, 0x1d2b,
+  0x1d78, 0x1d78
+}; /* CR_Cyrillic */
+
+/* 'Deseret': Script */
+static const OnigCodePoint CR_Deseret[] = {
+  1,
+  0x10400, 0x1044f
+}; /* CR_Deseret */
+
+/* 'Devanagari': Script */
+static const OnigCodePoint CR_Devanagari[] = {
+  6,
+  0x0901, 0x0939,
+  0x093c, 0x094d,
+  0x0950, 0x0954,
+  0x0958, 0x0963,
+  0x0966, 0x096f,
+  0x097d, 0x097d
+}; /* CR_Devanagari */
+
+/* 'Ethiopic': Script */
+static const OnigCodePoint CR_Ethiopic[] = {
+  27,
+  0x1200, 0x1248,
+  0x124a, 0x124d,
+  0x1250, 0x1256,
+  0x1258, 0x1258,
+  0x125a, 0x125d,
+  0x1260, 0x1288,
+  0x128a, 0x128d,
+  0x1290, 0x12b0,
+  0x12b2, 0x12b5,
+  0x12b8, 0x12be,
+  0x12c0, 0x12c0,
+  0x12c2, 0x12c5,
+  0x12c8, 0x12d6,
+  0x12d8, 0x1310,
+  0x1312, 0x1315,
+  0x1318, 0x135a,
+  0x135f, 0x137c,
+  0x1380, 0x1399,
+  0x2d80, 0x2d96,
+  0x2da0, 0x2da6,
+  0x2da8, 0x2dae,
+  0x2db0, 0x2db6,
+  0x2db8, 0x2dbe,
+  0x2dc0, 0x2dc6,
+  0x2dc8, 0x2dce,
+  0x2dd0, 0x2dd6,
+  0x2dd8, 0x2dde
+}; /* CR_Ethiopic */
+
+/* 'Georgian': Script */
+static const OnigCodePoint CR_Georgian[] = {
+  4,
+  0x10a0, 0x10c5,
+  0x10d0, 0x10fa,
+  0x10fc, 0x10fc,
+  0x2d00, 0x2d25
+}; /* CR_Georgian */
+
+/* 'Glagolitic': Script */
+static const OnigCodePoint CR_Glagolitic[] = {
+  2,
+  0x2c00, 0x2c2e,
+  0x2c30, 0x2c5e
+}; /* CR_Glagolitic */
+
+/* 'Gothic': Script */
+static const OnigCodePoint CR_Gothic[] = {
+  1,
+  0x10330, 0x1034a
+}; /* CR_Gothic */
+
+/* 'Greek': Script */
+static const OnigCodePoint CR_Greek[] = {
+  31,
+  0x0374, 0x0375,
+  0x037a, 0x037a,
+  0x0384, 0x0386,
+  0x0388, 0x038a,
+  0x038c, 0x038c,
+  0x038e, 0x03a1,
+  0x03a3, 0x03ce,
+  0x03d0, 0x03e1,
+  0x03f0, 0x03ff,
+  0x1d26, 0x1d2a,
+  0x1d5d, 0x1d61,
+  0x1d66, 0x1d6a,
+  0x1f00, 0x1f15,
+  0x1f18, 0x1f1d,
+  0x1f20, 0x1f45,
+  0x1f48, 0x1f4d,
+  0x1f50, 0x1f57,
+  0x1f59, 0x1f59,
+  0x1f5b, 0x1f5b,
+  0x1f5d, 0x1f5d,
+  0x1f5f, 0x1f7d,
+  0x1f80, 0x1fb4,
+  0x1fb6, 0x1fc4,
+  0x1fc6, 0x1fd3,
+  0x1fd6, 0x1fdb,
+  0x1fdd, 0x1fef,
+  0x1ff2, 0x1ff4,
+  0x1ff6, 0x1ffe,
+  0x2126, 0x2126,
+  0x10140, 0x1018a,
+  0x1d200, 0x1d245
+}; /* CR_Greek */
+
+/* 'Gujarati': Script */
+static const OnigCodePoint CR_Gujarati[] = {
+  14,
+  0x0a81, 0x0a83,
+  0x0a85, 0x0a8d,
+  0x0a8f, 0x0a91,
+  0x0a93, 0x0aa8,
+  0x0aaa, 0x0ab0,
+  0x0ab2, 0x0ab3,
+  0x0ab5, 0x0ab9,
+  0x0abc, 0x0ac5,
+  0x0ac7, 0x0ac9,
+  0x0acb, 0x0acd,
+  0x0ad0, 0x0ad0,
+  0x0ae0, 0x0ae3,
+  0x0ae6, 0x0aef,
+  0x0af1, 0x0af1
+}; /* CR_Gujarati */
+
+/* 'Gurmukhi': Script */
+static const OnigCodePoint CR_Gurmukhi[] = {
+  15,
+  0x0a01, 0x0a03,
+  0x0a05, 0x0a0a,
+  0x0a0f, 0x0a10,
+  0x0a13, 0x0a28,
+  0x0a2a, 0x0a30,
+  0x0a32, 0x0a33,
+  0x0a35, 0x0a36,
+  0x0a38, 0x0a39,
+  0x0a3c, 0x0a3c,
+  0x0a3e, 0x0a42,
+  0x0a47, 0x0a48,
+  0x0a4b, 0x0a4d,
+  0x0a59, 0x0a5c,
+  0x0a5e, 0x0a5e,
+  0x0a66, 0x0a74
+}; /* CR_Gurmukhi */
+
+/* 'Han': Script */
+static const OnigCodePoint CR_Han[] = {
+  14,
+  0x2e80, 0x2e99,
+  0x2e9b, 0x2ef3,
+  0x2f00, 0x2fd5,
+  0x3005, 0x3005,
+  0x3007, 0x3007,
+  0x3021, 0x3029,
+  0x3038, 0x303b,
+  0x3400, 0x4db5,
+  0x4e00, 0x9fbb,
+  0xf900, 0xfa2d,
+  0xfa30, 0xfa6a,
+  0xfa70, 0xfad9,
+  0x20000, 0x2a6d6,
+  0x2f800, 0x2fa1d
+}; /* CR_Han */
+
+/* 'Hangul': Script */
+static const OnigCodePoint CR_Hangul[] = {
+  12,
+  0x1100, 0x1159,
+  0x115f, 0x11a2,
+  0x11a8, 0x11f9,
+  0x3131, 0x318e,
+  0x3200, 0x321e,
+  0x3260, 0x327d,
+  0xac00, 0xd7a3,
+  0xffa0, 0xffbe,
+  0xffc2, 0xffc7,
+  0xffca, 0xffcf,
+  0xffd2, 0xffd7,
+  0xffda, 0xffdc
+}; /* CR_Hangul */
+
+/* 'Hanunoo': Script */
+static const OnigCodePoint CR_Hanunoo[] = {
+  1,
+  0x1720, 0x1734
+}; /* CR_Hanunoo */
+
+/* 'Hebrew': Script */
+static const OnigCodePoint CR_Hebrew[] = {
+  10,
+  0x0591, 0x05b9,
+  0x05bb, 0x05c7,
+  0x05d0, 0x05ea,
+  0x05f0, 0x05f4,
+  0xfb1d, 0xfb36,
+  0xfb38, 0xfb3c,
+  0xfb3e, 0xfb3e,
+  0xfb40, 0xfb41,
+  0xfb43, 0xfb44,
+  0xfb46, 0xfb4f
+}; /* CR_Hebrew */
+
+/* 'Hiragana': Script */
+static const OnigCodePoint CR_Hiragana[] = {
+  2,
+  0x3041, 0x3096,
+  0x309d, 0x309f
+}; /* CR_Hiragana */
+
+/* 'Inherited': Script */
+static const OnigCodePoint CR_Inherited[] = {
+  15,
+  0x0300, 0x036f,
+  0x064b, 0x0655,
+  0x0670, 0x0670,
+  0x1dc0, 0x1dc3,
+  0x200c, 0x200d,
+  0x20d0, 0x20eb,
+  0x302a, 0x302f,
+  0x3099, 0x309a,
+  0xfe00, 0xfe0f,
+  0xfe20, 0xfe23,
+  0x1d167, 0x1d169,
+  0x1d17b, 0x1d182,
+  0x1d185, 0x1d18b,
+  0x1d1aa, 0x1d1ad,
+  0xe0100, 0xe01ef
+}; /* CR_Inherited */
+
+/* 'Kannada': Script */
+static const OnigCodePoint CR_Kannada[] = {
+  13,
+  0x0c82, 0x0c83,
+  0x0c85, 0x0c8c,
+  0x0c8e, 0x0c90,
+  0x0c92, 0x0ca8,
+  0x0caa, 0x0cb3,
+  0x0cb5, 0x0cb9,
+  0x0cbc, 0x0cc4,
+  0x0cc6, 0x0cc8,
+  0x0cca, 0x0ccd,
+  0x0cd5, 0x0cd6,
+  0x0cde, 0x0cde,
+  0x0ce0, 0x0ce1,
+  0x0ce6, 0x0cef
+}; /* CR_Kannada */
+
+/* 'Katakana': Script */
+static const OnigCodePoint CR_Katakana[] = {
+  5,
+  0x30a1, 0x30fa,
+  0x30fd, 0x30ff,
+  0x31f0, 0x31ff,
+  0xff66, 0xff6f,
+  0xff71, 0xff9d
+}; /* CR_Katakana */
+
+/* 'Kharoshthi': Script */
+static const OnigCodePoint CR_Kharoshthi[] = {
+  8,
+  0x10a00, 0x10a03,
+  0x10a05, 0x10a06,
+  0x10a0c, 0x10a13,
+  0x10a15, 0x10a17,
+  0x10a19, 0x10a33,
+  0x10a38, 0x10a3a,
+  0x10a3f, 0x10a47,
+  0x10a50, 0x10a58
+}; /* CR_Kharoshthi */
+
+/* 'Khmer': Script */
+static const OnigCodePoint CR_Khmer[] = {
+  4,
+  0x1780, 0x17dd,
+  0x17e0, 0x17e9,
+  0x17f0, 0x17f9,
+  0x19e0, 0x19ff
+}; /* CR_Khmer */
+
+/* 'Lao': Script */
+static const OnigCodePoint CR_Lao[] = {
+  18,
+  0x0e81, 0x0e82,
+  0x0e84, 0x0e84,
+  0x0e87, 0x0e88,
+  0x0e8a, 0x0e8a,
+  0x0e8d, 0x0e8d,
+  0x0e94, 0x0e97,
+  0x0e99, 0x0e9f,
+  0x0ea1, 0x0ea3,
+  0x0ea5, 0x0ea5,
+  0x0ea7, 0x0ea7,
+  0x0eaa, 0x0eab,
+  0x0ead, 0x0eb9,
+  0x0ebb, 0x0ebd,
+  0x0ec0, 0x0ec4,
+  0x0ec6, 0x0ec6,
+  0x0ec8, 0x0ecd,
+  0x0ed0, 0x0ed9,
+  0x0edc, 0x0edd
+}; /* CR_Lao */
+
+/* 'Latin': Script */
+static const OnigCodePoint CR_Latin[] = {
+  23,
+  0x0041, 0x005a,
+  0x0061, 0x007a,
+  0x00aa, 0x00aa,
+  0x00ba, 0x00ba,
+  0x00c0, 0x00d6,
+  0x00d8, 0x00f6,
+  0x00f8, 0x0241,
+  0x0250, 0x02b8,
+  0x02e0, 0x02e4,
+  0x1d00, 0x1d25,
+  0x1d2c, 0x1d5c,
+  0x1d62, 0x1d65,
+  0x1d6b, 0x1d77,
+  0x1d79, 0x1dbf,
+  0x1e00, 0x1e9b,
+  0x1ea0, 0x1ef9,
+  0x2071, 0x2071,
+  0x207f, 0x207f,
+  0x2090, 0x2094,
+  0x212a, 0x212b,
+  0xfb00, 0xfb06,
+  0xff21, 0xff3a,
+  0xff41, 0xff5a
+}; /* CR_Latin */
+
+/* 'Limbu': Script */
+static const OnigCodePoint CR_Limbu[] = {
+  5,
+  0x1900, 0x191c,
+  0x1920, 0x192b,
+  0x1930, 0x193b,
+  0x1940, 0x1940,
+  0x1944, 0x194f
+}; /* CR_Limbu */
+
+/* 'Linear_B': Script */
+static const OnigCodePoint CR_Linear_B[] = {
+  7,
+  0x10000, 0x1000b,
+  0x1000d, 0x10026,
+  0x10028, 0x1003a,
+  0x1003c, 0x1003d,
+  0x1003f, 0x1004d,
+  0x10050, 0x1005d,
+  0x10080, 0x100fa
+}; /* CR_Linear_B */
+
+/* 'Malayalam': Script */
+static const OnigCodePoint CR_Malayalam[] = {
+  11,
+  0x0d02, 0x0d03,
+  0x0d05, 0x0d0c,
+  0x0d0e, 0x0d10,
+  0x0d12, 0x0d28,
+  0x0d2a, 0x0d39,
+  0x0d3e, 0x0d43,
+  0x0d46, 0x0d48,
+  0x0d4a, 0x0d4d,
+  0x0d57, 0x0d57,
+  0x0d60, 0x0d61,
+  0x0d66, 0x0d6f
+}; /* CR_Malayalam */
+
+/* 'Mongolian': Script */
+static const OnigCodePoint CR_Mongolian[] = {
+  4,
+  0x1800, 0x180e,
+  0x1810, 0x1819,
+  0x1820, 0x1877,
+  0x1880, 0x18a9
+}; /* CR_Mongolian */
+
+/* 'Myanmar': Script */
+static const OnigCodePoint CR_Myanmar[] = {
+  6,
+  0x1000, 0x1021,
+  0x1023, 0x1027,
+  0x1029, 0x102a,
+  0x102c, 0x1032,
+  0x1036, 0x1039,
+  0x1040, 0x1059
+}; /* CR_Myanmar */
+
+/* 'New_Tai_Lue': Script */
+static const OnigCodePoint CR_New_Tai_Lue[] = {
+  4,
+  0x1980, 0x19a9,
+  0x19b0, 0x19c9,
+  0x19d0, 0x19d9,
+  0x19de, 0x19df
+}; /* CR_New_Tai_Lue */
+
+/* 'Ogham': Script */
+static const OnigCodePoint CR_Ogham[] = {
+  1,
+  0x1680, 0x169c
+}; /* CR_Ogham */
+
+/* 'Old_Italic': Script */
+static const OnigCodePoint CR_Old_Italic[] = {
+  2,
+  0x10300, 0x1031e,
+  0x10320, 0x10323
+}; /* CR_Old_Italic */
+
+/* 'Old_Persian': Script */
+static const OnigCodePoint CR_Old_Persian[] = {
+  2,
+  0x103a0, 0x103c3,
+  0x103c8, 0x103d5
+}; /* CR_Old_Persian */
+
+/* 'Oriya': Script */
+static const OnigCodePoint CR_Oriya[] = {
+  14,
+  0x0b01, 0x0b03,
+  0x0b05, 0x0b0c,
+  0x0b0f, 0x0b10,
+  0x0b13, 0x0b28,
+  0x0b2a, 0x0b30,
+  0x0b32, 0x0b33,
+  0x0b35, 0x0b39,
+  0x0b3c, 0x0b43,
+  0x0b47, 0x0b48,
+  0x0b4b, 0x0b4d,
+  0x0b56, 0x0b57,
+  0x0b5c, 0x0b5d,
+  0x0b5f, 0x0b61,
+  0x0b66, 0x0b71
+}; /* CR_Oriya */
+
+/* 'Osmanya': Script */
+static const OnigCodePoint CR_Osmanya[] = {
+  2,
+  0x10480, 0x1049d,
+  0x104a0, 0x104a9
+}; /* CR_Osmanya */
+
+/* 'Runic': Script */
+static const OnigCodePoint CR_Runic[] = {
+  2,
+  0x16a0, 0x16ea,
+  0x16ee, 0x16f0
+}; /* CR_Runic */
+
+/* 'Shavian': Script */
+static const OnigCodePoint CR_Shavian[] = {
+  1,
+  0x10450, 0x1047f
+}; /* CR_Shavian */
+
+/* 'Sinhala': Script */
+static const OnigCodePoint CR_Sinhala[] = {
+  11,
+  0x0d82, 0x0d83,
+  0x0d85, 0x0d96,
+  0x0d9a, 0x0db1,
+  0x0db3, 0x0dbb,
+  0x0dbd, 0x0dbd,
+  0x0dc0, 0x0dc6,
+  0x0dca, 0x0dca,
+  0x0dcf, 0x0dd4,
+  0x0dd6, 0x0dd6,
+  0x0dd8, 0x0ddf,
+  0x0df2, 0x0df4
+}; /* CR_Sinhala */
+
+/* 'Syloti_Nagri': Script */
+static const OnigCodePoint CR_Syloti_Nagri[] = {
+  1,
+  0xa800, 0xa82b
+}; /* CR_Syloti_Nagri */
+
+/* 'Syriac': Script */
+static const OnigCodePoint CR_Syriac[] = {
+  3,
+  0x0700, 0x070d,
+  0x070f, 0x074a,
+  0x074d, 0x074f
+}; /* CR_Syriac */
+
+/* 'Tagalog': Script */
+static const OnigCodePoint CR_Tagalog[] = {
+  2,
+  0x1700, 0x170c,
+  0x170e, 0x1714
+}; /* CR_Tagalog */
+
+/* 'Tagbanwa': Script */
+static const OnigCodePoint CR_Tagbanwa[] = {
+  3,
+  0x1760, 0x176c,
+  0x176e, 0x1770,
+  0x1772, 0x1773
+}; /* CR_Tagbanwa */
+
+/* 'Tai_Le': Script */
+static const OnigCodePoint CR_Tai_Le[] = {
+  2,
+  0x1950, 0x196d,
+  0x1970, 0x1974
+}; /* CR_Tai_Le */
+
+/* 'Tamil': Script */
+static const OnigCodePoint CR_Tamil[] = {
+  15,
+  0x0b82, 0x0b83,
+  0x0b85, 0x0b8a,
+  0x0b8e, 0x0b90,
+  0x0b92, 0x0b95,
+  0x0b99, 0x0b9a,
+  0x0b9c, 0x0b9c,
+  0x0b9e, 0x0b9f,
+  0x0ba3, 0x0ba4,
+  0x0ba8, 0x0baa,
+  0x0bae, 0x0bb9,
+  0x0bbe, 0x0bc2,
+  0x0bc6, 0x0bc8,
+  0x0bca, 0x0bcd,
+  0x0bd7, 0x0bd7,
+  0x0be6, 0x0bfa
+}; /* CR_Tamil */
+
+/* 'Telugu': Script */
+static const OnigCodePoint CR_Telugu[] = {
+  12,
+  0x0c01, 0x0c03,
+  0x0c05, 0x0c0c,
+  0x0c0e, 0x0c10,
+  0x0c12, 0x0c28,
+  0x0c2a, 0x0c33,
+  0x0c35, 0x0c39,
+  0x0c3e, 0x0c44,
+  0x0c46, 0x0c48,
+  0x0c4a, 0x0c4d,
+  0x0c55, 0x0c56,
+  0x0c60, 0x0c61,
+  0x0c66, 0x0c6f
+}; /* CR_Telugu */
+
+/* 'Thaana': Script */
+static const OnigCodePoint CR_Thaana[] = {
+  1,
+  0x0780, 0x07b1
+}; /* CR_Thaana */
+
+/* 'Thai': Script */
+static const OnigCodePoint CR_Thai[] = {
+  2,
+  0x0e01, 0x0e3a,
+  0x0e40, 0x0e5b
+}; /* CR_Thai */
+
+/* 'Tibetan': Script */
+static const OnigCodePoint CR_Tibetan[] = {
+  7,
+  0x0f00, 0x0f47,
+  0x0f49, 0x0f6a,
+  0x0f71, 0x0f8b,
+  0x0f90, 0x0f97,
+  0x0f99, 0x0fbc,
+  0x0fbe, 0x0fcc,
+  0x0fcf, 0x0fd1
+}; /* CR_Tibetan */
+
+/* 'Tifinagh': Script */
+static const OnigCodePoint CR_Tifinagh[] = {
+  2,
+  0x2d30, 0x2d65,
+  0x2d6f, 0x2d6f
+}; /* CR_Tifinagh */
+
+/* 'Ugaritic': Script */
+static const OnigCodePoint CR_Ugaritic[] = {
+  2,
+  0x10380, 0x1039d,
+  0x1039f, 0x1039f
+}; /* CR_Ugaritic */
+
+/* 'Yi': Script */
+static const OnigCodePoint CR_Yi[] = {
+  2,
+  0xa000, 0xa48c,
+  0xa490, 0xa4c6
+}; /* CR_Yi */
+
+
+#endif /* USE_UNICODE_PROPERTIES */
+
+
+typedef struct {
+  int n;
+  OnigCodePoint code[3];
+} CodePointList3;
+
+typedef struct {
+  OnigCodePoint  from;
+  CodePointList3 to;
+} CaseFold_11_Type;
+
+typedef struct {
+  OnigCodePoint  from;
+  CodePointList3 to;
+} CaseUnfold_11_Type;
+
+typedef struct {
+  int n;
+  OnigCodePoint code[2];
+} CodePointList2;
+
+typedef struct {
+  OnigCodePoint  from[2];
+  CodePointList2 to;
+} CaseUnfold_12_Type;
+
+typedef struct {
+  OnigCodePoint  from[3];
+  CodePointList2 to;
+} CaseUnfold_13_Type;
+
+static const CaseFold_11_Type CaseFold[] = {
+ { 0x0041, {1, {0x0061}}},
+ { 0x0042, {1, {0x0062}}},
+ { 0x0043, {1, {0x0063}}},
+ { 0x0044, {1, {0x0064}}},
+ { 0x0045, {1, {0x0065}}},
+ { 0x0046, {1, {0x0066}}},
+ { 0x0047, {1, {0x0067}}},
+ { 0x0048, {1, {0x0068}}},
+ { 0x004a, {1, {0x006a}}},
+ { 0x004b, {1, {0x006b}}},
+ { 0x004c, {1, {0x006c}}},
+ { 0x004d, {1, {0x006d}}},
+ { 0x004e, {1, {0x006e}}},
+ { 0x004f, {1, {0x006f}}},
+ { 0x0050, {1, {0x0070}}},
+ { 0x0051, {1, {0x0071}}},
+ { 0x0052, {1, {0x0072}}},
+ { 0x0053, {1, {0x0073}}},
+ { 0x0054, {1, {0x0074}}},
+ { 0x0055, {1, {0x0075}}},
+ { 0x0056, {1, {0x0076}}},
+ { 0x0057, {1, {0x0077}}},
+ { 0x0058, {1, {0x0078}}},
+ { 0x0059, {1, {0x0079}}},
+ { 0x005a, {1, {0x007a}}},
+ { 0x00b5, {1, {0x03bc}}},
+ { 0x00c0, {1, {0x00e0}}},
+ { 0x00c1, {1, {0x00e1}}},
+ { 0x00c2, {1, {0x00e2}}},
+ { 0x00c3, {1, {0x00e3}}},
+ { 0x00c4, {1, {0x00e4}}},
+ { 0x00c5, {1, {0x00e5}}},
+ { 0x00c6, {1, {0x00e6}}},
+ { 0x00c7, {1, {0x00e7}}},
+ { 0x00c8, {1, {0x00e8}}},
+ { 0x00c9, {1, {0x00e9}}},
+ { 0x00ca, {1, {0x00ea}}},
+ { 0x00cb, {1, {0x00eb}}},
+ { 0x00cc, {1, {0x00ec}}},
+ { 0x00cd, {1, {0x00ed}}},
+ { 0x00ce, {1, {0x00ee}}},
+ { 0x00cf, {1, {0x00ef}}},
+ { 0x00d0, {1, {0x00f0}}},
+ { 0x00d1, {1, {0x00f1}}},
+ { 0x00d2, {1, {0x00f2}}},
+ { 0x00d3, {1, {0x00f3}}},
+ { 0x00d4, {1, {0x00f4}}},
+ { 0x00d5, {1, {0x00f5}}},
+ { 0x00d6, {1, {0x00f6}}},
+ { 0x00d8, {1, {0x00f8}}},
+ { 0x00d9, {1, {0x00f9}}},
+ { 0x00da, {1, {0x00fa}}},
+ { 0x00db, {1, {0x00fb}}},
+ { 0x00dc, {1, {0x00fc}}},
+ { 0x00dd, {1, {0x00fd}}},
+ { 0x00de, {1, {0x00fe}}},
+ { 0x00df, {2, {0x0073, 0x0073}}},
+ { 0x0100, {1, {0x0101}}},
+ { 0x0102, {1, {0x0103}}},
+ { 0x0104, {1, {0x0105}}},
+ { 0x0106, {1, {0x0107}}},
+ { 0x0108, {1, {0x0109}}},
+ { 0x010a, {1, {0x010b}}},
+ { 0x010c, {1, {0x010d}}},
+ { 0x010e, {1, {0x010f}}},
+ { 0x0110, {1, {0x0111}}},
+ { 0x0112, {1, {0x0113}}},
+ { 0x0114, {1, {0x0115}}},
+ { 0x0116, {1, {0x0117}}},
+ { 0x0118, {1, {0x0119}}},
+ { 0x011a, {1, {0x011b}}},
+ { 0x011c, {1, {0x011d}}},
+ { 0x011e, {1, {0x011f}}},
+ { 0x0120, {1, {0x0121}}},
+ { 0x0122, {1, {0x0123}}},
+ { 0x0124, {1, {0x0125}}},
+ { 0x0126, {1, {0x0127}}},
+ { 0x0128, {1, {0x0129}}},
+ { 0x012a, {1, {0x012b}}},
+ { 0x012c, {1, {0x012d}}},
+ { 0x012e, {1, {0x012f}}},
+ { 0x0132, {1, {0x0133}}},
+ { 0x0134, {1, {0x0135}}},
+ { 0x0136, {1, {0x0137}}},
+ { 0x0139, {1, {0x013a}}},
+ { 0x013b, {1, {0x013c}}},
+ { 0x013d, {1, {0x013e}}},
+ { 0x013f, {1, {0x0140}}},
+ { 0x0141, {1, {0x0142}}},
+ { 0x0143, {1, {0x0144}}},
+ { 0x0145, {1, {0x0146}}},
+ { 0x0147, {1, {0x0148}}},
+ { 0x0149, {2, {0x02bc, 0x006e}}},
+ { 0x014a, {1, {0x014b}}},
+ { 0x014c, {1, {0x014d}}},
+ { 0x014e, {1, {0x014f}}},
+ { 0x0150, {1, {0x0151}}},
+ { 0x0152, {1, {0x0153}}},
+ { 0x0154, {1, {0x0155}}},
+ { 0x0156, {1, {0x0157}}},
+ { 0x0158, {1, {0x0159}}},
+ { 0x015a, {1, {0x015b}}},
+ { 0x015c, {1, {0x015d}}},
+ { 0x015e, {1, {0x015f}}},
+ { 0x0160, {1, {0x0161}}},
+ { 0x0162, {1, {0x0163}}},
+ { 0x0164, {1, {0x0165}}},
+ { 0x0166, {1, {0x0167}}},
+ { 0x0168, {1, {0x0169}}},
+ { 0x016a, {1, {0x016b}}},
+ { 0x016c, {1, {0x016d}}},
+ { 0x016e, {1, {0x016f}}},
+ { 0x0170, {1, {0x0171}}},
+ { 0x0172, {1, {0x0173}}},
+ { 0x0174, {1, {0x0175}}},
+ { 0x0176, {1, {0x0177}}},
+ { 0x0178, {1, {0x00ff}}},
+ { 0x0179, {1, {0x017a}}},
+ { 0x017b, {1, {0x017c}}},
+ { 0x017d, {1, {0x017e}}},
+ { 0x017f, {1, {0x0073}}},
+ { 0x0181, {1, {0x0253}}},
+ { 0x0182, {1, {0x0183}}},
+ { 0x0184, {1, {0x0185}}},
+ { 0x0186, {1, {0x0254}}},
+ { 0x0187, {1, {0x0188}}},
+ { 0x0189, {1, {0x0256}}},
+ { 0x018a, {1, {0x0257}}},
+ { 0x018b, {1, {0x018c}}},
+ { 0x018e, {1, {0x01dd}}},
+ { 0x018f, {1, {0x0259}}},
+ { 0x0190, {1, {0x025b}}},
+ { 0x0191, {1, {0x0192}}},
+ { 0x0193, {1, {0x0260}}},
+ { 0x0194, {1, {0x0263}}},
+ { 0x0196, {1, {0x0269}}},
+ { 0x0197, {1, {0x0268}}},
+ { 0x0198, {1, {0x0199}}},
+ { 0x019c, {1, {0x026f}}},
+ { 0x019d, {1, {0x0272}}},
+ { 0x019f, {1, {0x0275}}},
+ { 0x01a0, {1, {0x01a1}}},
+ { 0x01a2, {1, {0x01a3}}},
+ { 0x01a4, {1, {0x01a5}}},
+ { 0x01a6, {1, {0x0280}}},
+ { 0x01a7, {1, {0x01a8}}},
+ { 0x01a9, {1, {0x0283}}},
+ { 0x01ac, {1, {0x01ad}}},
+ { 0x01ae, {1, {0x0288}}},
+ { 0x01af, {1, {0x01b0}}},
+ { 0x01b1, {1, {0x028a}}},
+ { 0x01b2, {1, {0x028b}}},
+ { 0x01b3, {1, {0x01b4}}},
+ { 0x01b5, {1, {0x01b6}}},
+ { 0x01b7, {1, {0x0292}}},
+ { 0x01b8, {1, {0x01b9}}},
+ { 0x01bc, {1, {0x01bd}}},
+ { 0x01c4, {1, {0x01c6}}},
+ { 0x01c5, {1, {0x01c6}}},
+ { 0x01c7, {1, {0x01c9}}},
+ { 0x01c8, {1, {0x01c9}}},
+ { 0x01ca, {1, {0x01cc}}},
+ { 0x01cb, {1, {0x01cc}}},
+ { 0x01cd, {1, {0x01ce}}},
+ { 0x01cf, {1, {0x01d0}}},
+ { 0x01d1, {1, {0x01d2}}},
+ { 0x01d3, {1, {0x01d4}}},
+ { 0x01d5, {1, {0x01d6}}},
+ { 0x01d7, {1, {0x01d8}}},
+ { 0x01d9, {1, {0x01da}}},
+ { 0x01db, {1, {0x01dc}}},
+ { 0x01de, {1, {0x01df}}},
+ { 0x01e0, {1, {0x01e1}}},
+ { 0x01e2, {1, {0x01e3}}},
+ { 0x01e4, {1, {0x01e5}}},
+ { 0x01e6, {1, {0x01e7}}},
+ { 0x01e8, {1, {0x01e9}}},
+ { 0x01ea, {1, {0x01eb}}},
+ { 0x01ec, {1, {0x01ed}}},
+ { 0x01ee, {1, {0x01ef}}},
+ { 0x01f0, {2, {0x006a, 0x030c}}},
+ { 0x01f1, {1, {0x01f3}}},
+ { 0x01f2, {1, {0x01f3}}},
+ { 0x01f4, {1, {0x01f5}}},
+ { 0x01f6, {1, {0x0195}}},
+ { 0x01f7, {1, {0x01bf}}},
+ { 0x01f8, {1, {0x01f9}}},
+ { 0x01fa, {1, {0x01fb}}},
+ { 0x01fc, {1, {0x01fd}}},
+ { 0x01fe, {1, {0x01ff}}},
+ { 0x0200, {1, {0x0201}}},
+ { 0x0202, {1, {0x0203}}},
+ { 0x0204, {1, {0x0205}}},
+ { 0x0206, {1, {0x0207}}},
+ { 0x0208, {1, {0x0209}}},
+ { 0x020a, {1, {0x020b}}},
+ { 0x020c, {1, {0x020d}}},
+ { 0x020e, {1, {0x020f}}},
+ { 0x0210, {1, {0x0211}}},
+ { 0x0212, {1, {0x0213}}},
+ { 0x0214, {1, {0x0215}}},
+ { 0x0216, {1, {0x0217}}},
+ { 0x0218, {1, {0x0219}}},
+ { 0x021a, {1, {0x021b}}},
+ { 0x021c, {1, {0x021d}}},
+ { 0x021e, {1, {0x021f}}},
+ { 0x0220, {1, {0x019e}}},
+ { 0x0222, {1, {0x0223}}},
+ { 0x0224, {1, {0x0225}}},
+ { 0x0226, {1, {0x0227}}},
+ { 0x0228, {1, {0x0229}}},
+ { 0x022a, {1, {0x022b}}},
+ { 0x022c, {1, {0x022d}}},
+ { 0x022e, {1, {0x022f}}},
+ { 0x0230, {1, {0x0231}}},
+ { 0x0232, {1, {0x0233}}},
+ { 0x023b, {1, {0x023c}}},
+ { 0x023d, {1, {0x019a}}},
+ { 0x0241, {1, {0x0294}}},
+ { 0x0345, {1, {0x03b9}}},
+ { 0x0386, {1, {0x03ac}}},
+ { 0x0388, {1, {0x03ad}}},
+ { 0x0389, {1, {0x03ae}}},
+ { 0x038a, {1, {0x03af}}},
+ { 0x038c, {1, {0x03cc}}},
+ { 0x038e, {1, {0x03cd}}},
+ { 0x038f, {1, {0x03ce}}},
+ { 0x0390, {3, {0x03b9, 0x0308, 0x0301}}},
+ { 0x0391, {1, {0x03b1}}},
+ { 0x0392, {1, {0x03b2}}},
+ { 0x0393, {1, {0x03b3}}},
+ { 0x0394, {1, {0x03b4}}},
+ { 0x0395, {1, {0x03b5}}},
+ { 0x0396, {1, {0x03b6}}},
+ { 0x0397, {1, {0x03b7}}},
+ { 0x0398, {1, {0x03b8}}},
+ { 0x0399, {1, {0x03b9}}},
+ { 0x039a, {1, {0x03ba}}},
+ { 0x039b, {1, {0x03bb}}},
+ { 0x039c, {1, {0x03bc}}},
+ { 0x039d, {1, {0x03bd}}},
+ { 0x039e, {1, {0x03be}}},
+ { 0x039f, {1, {0x03bf}}},
+ { 0x03a0, {1, {0x03c0}}},
+ { 0x03a1, {1, {0x03c1}}},
+ { 0x03a3, {1, {0x03c3}}},
+ { 0x03a4, {1, {0x03c4}}},
+ { 0x03a5, {1, {0x03c5}}},
+ { 0x03a6, {1, {0x03c6}}},
+ { 0x03a7, {1, {0x03c7}}},
+ { 0x03a8, {1, {0x03c8}}},
+ { 0x03a9, {1, {0x03c9}}},
+ { 0x03aa, {1, {0x03ca}}},
+ { 0x03ab, {1, {0x03cb}}},
+ { 0x03b0, {3, {0x03c5, 0x0308, 0x0301}}},
+ { 0x03c2, {1, {0x03c3}}},
+ { 0x03d0, {1, {0x03b2}}},
+ { 0x03d1, {1, {0x03b8}}},
+ { 0x03d5, {1, {0x03c6}}},
+ { 0x03d6, {1, {0x03c0}}},
+ { 0x03d8, {1, {0x03d9}}},
+ { 0x03da, {1, {0x03db}}},
+ { 0x03dc, {1, {0x03dd}}},
+ { 0x03de, {1, {0x03df}}},
+ { 0x03e0, {1, {0x03e1}}},
+ { 0x03e2, {1, {0x03e3}}},
+ { 0x03e4, {1, {0x03e5}}},
+ { 0x03e6, {1, {0x03e7}}},
+ { 0x03e8, {1, {0x03e9}}},
+ { 0x03ea, {1, {0x03eb}}},
+ { 0x03ec, {1, {0x03ed}}},
+ { 0x03ee, {1, {0x03ef}}},
+ { 0x03f0, {1, {0x03ba}}},
+ { 0x03f1, {1, {0x03c1}}},
+ { 0x03f4, {1, {0x03b8}}},
+ { 0x03f5, {1, {0x03b5}}},
+ { 0x03f7, {1, {0x03f8}}},
+ { 0x03f9, {1, {0x03f2}}},
+ { 0x03fa, {1, {0x03fb}}},
+ { 0x0400, {1, {0x0450}}},
+ { 0x0401, {1, {0x0451}}},
+ { 0x0402, {1, {0x0452}}},
+ { 0x0403, {1, {0x0453}}},
+ { 0x0404, {1, {0x0454}}},
+ { 0x0405, {1, {0x0455}}},
+ { 0x0406, {1, {0x0456}}},
+ { 0x0407, {1, {0x0457}}},
+ { 0x0408, {1, {0x0458}}},
+ { 0x0409, {1, {0x0459}}},
+ { 0x040a, {1, {0x045a}}},
+ { 0x040b, {1, {0x045b}}},
+ { 0x040c, {1, {0x045c}}},
+ { 0x040d, {1, {0x045d}}},
+ { 0x040e, {1, {0x045e}}},
+ { 0x040f, {1, {0x045f}}},
+ { 0x0410, {1, {0x0430}}},
+ { 0x0411, {1, {0x0431}}},
+ { 0x0412, {1, {0x0432}}},
+ { 0x0413, {1, {0x0433}}},
+ { 0x0414, {1, {0x0434}}},
+ { 0x0415, {1, {0x0435}}},
+ { 0x0416, {1, {0x0436}}},
+ { 0x0417, {1, {0x0437}}},
+ { 0x0418, {1, {0x0438}}},
+ { 0x0419, {1, {0x0439}}},
+ { 0x041a, {1, {0x043a}}},
+ { 0x041b, {1, {0x043b}}},
+ { 0x041c, {1, {0x043c}}},
+ { 0x041d, {1, {0x043d}}},
+ { 0x041e, {1, {0x043e}}},
+ { 0x041f, {1, {0x043f}}},
+ { 0x0420, {1, {0x0440}}},
+ { 0x0421, {1, {0x0441}}},
+ { 0x0422, {1, {0x0442}}},
+ { 0x0423, {1, {0x0443}}},
+ { 0x0424, {1, {0x0444}}},
+ { 0x0425, {1, {0x0445}}},
+ { 0x0426, {1, {0x0446}}},
+ { 0x0427, {1, {0x0447}}},
+ { 0x0428, {1, {0x0448}}},
+ { 0x0429, {1, {0x0449}}},
+ { 0x042a, {1, {0x044a}}},
+ { 0x042b, {1, {0x044b}}},
+ { 0x042c, {1, {0x044c}}},
+ { 0x042d, {1, {0x044d}}},
+ { 0x042e, {1, {0x044e}}},
+ { 0x042f, {1, {0x044f}}},
+ { 0x0460, {1, {0x0461}}},
+ { 0x0462, {1, {0x0463}}},
+ { 0x0464, {1, {0x0465}}},
+ { 0x0466, {1, {0x0467}}},
+ { 0x0468, {1, {0x0469}}},
+ { 0x046a, {1, {0x046b}}},
+ { 0x046c, {1, {0x046d}}},
+ { 0x046e, {1, {0x046f}}},
+ { 0x0470, {1, {0x0471}}},
+ { 0x0472, {1, {0x0473}}},
+ { 0x0474, {1, {0x0475}}},
+ { 0x0476, {1, {0x0477}}},
+ { 0x0478, {1, {0x0479}}},
+ { 0x047a, {1, {0x047b}}},
+ { 0x047c, {1, {0x047d}}},
+ { 0x047e, {1, {0x047f}}},
+ { 0x0480, {1, {0x0481}}},
+ { 0x048a, {1, {0x048b}}},
+ { 0x048c, {1, {0x048d}}},
+ { 0x048e, {1, {0x048f}}},
+ { 0x0490, {1, {0x0491}}},
+ { 0x0492, {1, {0x0493}}},
+ { 0x0494, {1, {0x0495}}},
+ { 0x0496, {1, {0x0497}}},
+ { 0x0498, {1, {0x0499}}},
+ { 0x049a, {1, {0x049b}}},
+ { 0x049c, {1, {0x049d}}},
+ { 0x049e, {1, {0x049f}}},
+ { 0x04a0, {1, {0x04a1}}},
+ { 0x04a2, {1, {0x04a3}}},
+ { 0x04a4, {1, {0x04a5}}},
+ { 0x04a6, {1, {0x04a7}}},
+ { 0x04a8, {1, {0x04a9}}},
+ { 0x04aa, {1, {0x04ab}}},
+ { 0x04ac, {1, {0x04ad}}},
+ { 0x04ae, {1, {0x04af}}},
+ { 0x04b0, {1, {0x04b1}}},
+ { 0x04b2, {1, {0x04b3}}},
+ { 0x04b4, {1, {0x04b5}}},
+ { 0x04b6, {1, {0x04b7}}},
+ { 0x04b8, {1, {0x04b9}}},
+ { 0x04ba, {1, {0x04bb}}},
+ { 0x04bc, {1, {0x04bd}}},
+ { 0x04be, {1, {0x04bf}}},
+ { 0x04c1, {1, {0x04c2}}},
+ { 0x04c3, {1, {0x04c4}}},
+ { 0x04c5, {1, {0x04c6}}},
+ { 0x04c7, {1, {0x04c8}}},
+ { 0x04c9, {1, {0x04ca}}},
+ { 0x04cb, {1, {0x04cc}}},
+ { 0x04cd, {1, {0x04ce}}},
+ { 0x04d0, {1, {0x04d1}}},
+ { 0x04d2, {1, {0x04d3}}},
+ { 0x04d4, {1, {0x04d5}}},
+ { 0x04d6, {1, {0x04d7}}},
+ { 0x04d8, {1, {0x04d9}}},
+ { 0x04da, {1, {0x04db}}},
+ { 0x04dc, {1, {0x04dd}}},
+ { 0x04de, {1, {0x04df}}},
+ { 0x04e0, {1, {0x04e1}}},
+ { 0x04e2, {1, {0x04e3}}},
+ { 0x04e4, {1, {0x04e5}}},
+ { 0x04e6, {1, {0x04e7}}},
+ { 0x04e8, {1, {0x04e9}}},
+ { 0x04ea, {1, {0x04eb}}},
+ { 0x04ec, {1, {0x04ed}}},
+ { 0x04ee, {1, {0x04ef}}},
+ { 0x04f0, {1, {0x04f1}}},
+ { 0x04f2, {1, {0x04f3}}},
+ { 0x04f4, {1, {0x04f5}}},
+ { 0x04f6, {1, {0x04f7}}},
+ { 0x04f8, {1, {0x04f9}}},
+ { 0x0500, {1, {0x0501}}},
+ { 0x0502, {1, {0x0503}}},
+ { 0x0504, {1, {0x0505}}},
+ { 0x0506, {1, {0x0507}}},
+ { 0x0508, {1, {0x0509}}},
+ { 0x050a, {1, {0x050b}}},
+ { 0x050c, {1, {0x050d}}},
+ { 0x050e, {1, {0x050f}}},
+ { 0x0531, {1, {0x0561}}},
+ { 0x0532, {1, {0x0562}}},
+ { 0x0533, {1, {0x0563}}},
+ { 0x0534, {1, {0x0564}}},
+ { 0x0535, {1, {0x0565}}},
+ { 0x0536, {1, {0x0566}}},
+ { 0x0537, {1, {0x0567}}},
+ { 0x0538, {1, {0x0568}}},
+ { 0x0539, {1, {0x0569}}},
+ { 0x053a, {1, {0x056a}}},
+ { 0x053b, {1, {0x056b}}},
+ { 0x053c, {1, {0x056c}}},
+ { 0x053d, {1, {0x056d}}},
+ { 0x053e, {1, {0x056e}}},
+ { 0x053f, {1, {0x056f}}},
+ { 0x0540, {1, {0x0570}}},
+ { 0x0541, {1, {0x0571}}},
+ { 0x0542, {1, {0x0572}}},
+ { 0x0543, {1, {0x0573}}},
+ { 0x0544, {1, {0x0574}}},
+ { 0x0545, {1, {0x0575}}},
+ { 0x0546, {1, {0x0576}}},
+ { 0x0547, {1, {0x0577}}},
+ { 0x0548, {1, {0x0578}}},
+ { 0x0549, {1, {0x0579}}},
+ { 0x054a, {1, {0x057a}}},
+ { 0x054b, {1, {0x057b}}},
+ { 0x054c, {1, {0x057c}}},
+ { 0x054d, {1, {0x057d}}},
+ { 0x054e, {1, {0x057e}}},
+ { 0x054f, {1, {0x057f}}},
+ { 0x0550, {1, {0x0580}}},
+ { 0x0551, {1, {0x0581}}},
+ { 0x0552, {1, {0x0582}}},
+ { 0x0553, {1, {0x0583}}},
+ { 0x0554, {1, {0x0584}}},
+ { 0x0555, {1, {0x0585}}},
+ { 0x0556, {1, {0x0586}}},
+ { 0x0587, {2, {0x0565, 0x0582}}},
+ { 0x10a0, {1, {0x2d00}}},
+ { 0x10a1, {1, {0x2d01}}},
+ { 0x10a2, {1, {0x2d02}}},
+ { 0x10a3, {1, {0x2d03}}},
+ { 0x10a4, {1, {0x2d04}}},
+ { 0x10a5, {1, {0x2d05}}},
+ { 0x10a6, {1, {0x2d06}}},
+ { 0x10a7, {1, {0x2d07}}},
+ { 0x10a8, {1, {0x2d08}}},
+ { 0x10a9, {1, {0x2d09}}},
+ { 0x10aa, {1, {0x2d0a}}},
+ { 0x10ab, {1, {0x2d0b}}},
+ { 0x10ac, {1, {0x2d0c}}},
+ { 0x10ad, {1, {0x2d0d}}},
+ { 0x10ae, {1, {0x2d0e}}},
+ { 0x10af, {1, {0x2d0f}}},
+ { 0x10b0, {1, {0x2d10}}},
+ { 0x10b1, {1, {0x2d11}}},
+ { 0x10b2, {1, {0x2d12}}},
+ { 0x10b3, {1, {0x2d13}}},
+ { 0x10b4, {1, {0x2d14}}},
+ { 0x10b5, {1, {0x2d15}}},
+ { 0x10b6, {1, {0x2d16}}},
+ { 0x10b7, {1, {0x2d17}}},
+ { 0x10b8, {1, {0x2d18}}},
+ { 0x10b9, {1, {0x2d19}}},
+ { 0x10ba, {1, {0x2d1a}}},
+ { 0x10bb, {1, {0x2d1b}}},
+ { 0x10bc, {1, {0x2d1c}}},
+ { 0x10bd, {1, {0x2d1d}}},
+ { 0x10be, {1, {0x2d1e}}},
+ { 0x10bf, {1, {0x2d1f}}},
+ { 0x10c0, {1, {0x2d20}}},
+ { 0x10c1, {1, {0x2d21}}},
+ { 0x10c2, {1, {0x2d22}}},
+ { 0x10c3, {1, {0x2d23}}},
+ { 0x10c4, {1, {0x2d24}}},
+ { 0x10c5, {1, {0x2d25}}},
+ { 0x1e00, {1, {0x1e01}}},
+ { 0x1e02, {1, {0x1e03}}},
+ { 0x1e04, {1, {0x1e05}}},
+ { 0x1e06, {1, {0x1e07}}},
+ { 0x1e08, {1, {0x1e09}}},
+ { 0x1e0a, {1, {0x1e0b}}},
+ { 0x1e0c, {1, {0x1e0d}}},
+ { 0x1e0e, {1, {0x1e0f}}},
+ { 0x1e10, {1, {0x1e11}}},
+ { 0x1e12, {1, {0x1e13}}},
+ { 0x1e14, {1, {0x1e15}}},
+ { 0x1e16, {1, {0x1e17}}},
+ { 0x1e18, {1, {0x1e19}}},
+ { 0x1e1a, {1, {0x1e1b}}},
+ { 0x1e1c, {1, {0x1e1d}}},
+ { 0x1e1e, {1, {0x1e1f}}},
+ { 0x1e20, {1, {0x1e21}}},
+ { 0x1e22, {1, {0x1e23}}},
+ { 0x1e24, {1, {0x1e25}}},
+ { 0x1e26, {1, {0x1e27}}},
+ { 0x1e28, {1, {0x1e29}}},
+ { 0x1e2a, {1, {0x1e2b}}},
+ { 0x1e2c, {1, {0x1e2d}}},
+ { 0x1e2e, {1, {0x1e2f}}},
+ { 0x1e30, {1, {0x1e31}}},
+ { 0x1e32, {1, {0x1e33}}},
+ { 0x1e34, {1, {0x1e35}}},
+ { 0x1e36, {1, {0x1e37}}},
+ { 0x1e38, {1, {0x1e39}}},
+ { 0x1e3a, {1, {0x1e3b}}},
+ { 0x1e3c, {1, {0x1e3d}}},
+ { 0x1e3e, {1, {0x1e3f}}},
+ { 0x1e40, {1, {0x1e41}}},
+ { 0x1e42, {1, {0x1e43}}},
+ { 0x1e44, {1, {0x1e45}}},
+ { 0x1e46, {1, {0x1e47}}},
+ { 0x1e48, {1, {0x1e49}}},
+ { 0x1e4a, {1, {0x1e4b}}},
+ { 0x1e4c, {1, {0x1e4d}}},
+ { 0x1e4e, {1, {0x1e4f}}},
+ { 0x1e50, {1, {0x1e51}}},
+ { 0x1e52, {1, {0x1e53}}},
+ { 0x1e54, {1, {0x1e55}}},
+ { 0x1e56, {1, {0x1e57}}},
+ { 0x1e58, {1, {0x1e59}}},
+ { 0x1e5a, {1, {0x1e5b}}},
+ { 0x1e5c, {1, {0x1e5d}}},
+ { 0x1e5e, {1, {0x1e5f}}},
+ { 0x1e60, {1, {0x1e61}}},
+ { 0x1e62, {1, {0x1e63}}},
+ { 0x1e64, {1, {0x1e65}}},
+ { 0x1e66, {1, {0x1e67}}},
+ { 0x1e68, {1, {0x1e69}}},
+ { 0x1e6a, {1, {0x1e6b}}},
+ { 0x1e6c, {1, {0x1e6d}}},
+ { 0x1e6e, {1, {0x1e6f}}},
+ { 0x1e70, {1, {0x1e71}}},
+ { 0x1e72, {1, {0x1e73}}},
+ { 0x1e74, {1, {0x1e75}}},
+ { 0x1e76, {1, {0x1e77}}},
+ { 0x1e78, {1, {0x1e79}}},
+ { 0x1e7a, {1, {0x1e7b}}},
+ { 0x1e7c, {1, {0x1e7d}}},
+ { 0x1e7e, {1, {0x1e7f}}},
+ { 0x1e80, {1, {0x1e81}}},
+ { 0x1e82, {1, {0x1e83}}},
+ { 0x1e84, {1, {0x1e85}}},
+ { 0x1e86, {1, {0x1e87}}},
+ { 0x1e88, {1, {0x1e89}}},
+ { 0x1e8a, {1, {0x1e8b}}},
+ { 0x1e8c, {1, {0x1e8d}}},
+ { 0x1e8e, {1, {0x1e8f}}},
+ { 0x1e90, {1, {0x1e91}}},
+ { 0x1e92, {1, {0x1e93}}},
+ { 0x1e94, {1, {0x1e95}}},
+ { 0x1e96, {2, {0x0068, 0x0331}}},
+ { 0x1e97, {2, {0x0074, 0x0308}}},
+ { 0x1e98, {2, {0x0077, 0x030a}}},
+ { 0x1e99, {2, {0x0079, 0x030a}}},
+ { 0x1e9a, {2, {0x0061, 0x02be}}},
+ { 0x1e9b, {1, {0x1e61}}},
+ { 0x1ea0, {1, {0x1ea1}}},
+ { 0x1ea2, {1, {0x1ea3}}},
+ { 0x1ea4, {1, {0x1ea5}}},
+ { 0x1ea6, {1, {0x1ea7}}},
+ { 0x1ea8, {1, {0x1ea9}}},
+ { 0x1eaa, {1, {0x1eab}}},
+ { 0x1eac, {1, {0x1ead}}},
+ { 0x1eae, {1, {0x1eaf}}},
+ { 0x1eb0, {1, {0x1eb1}}},
+ { 0x1eb2, {1, {0x1eb3}}},
+ { 0x1eb4, {1, {0x1eb5}}},
+ { 0x1eb6, {1, {0x1eb7}}},
+ { 0x1eb8, {1, {0x1eb9}}},
+ { 0x1eba, {1, {0x1ebb}}},
+ { 0x1ebc, {1, {0x1ebd}}},
+ { 0x1ebe, {1, {0x1ebf}}},
+ { 0x1ec0, {1, {0x1ec1}}},
+ { 0x1ec2, {1, {0x1ec3}}},
+ { 0x1ec4, {1, {0x1ec5}}},
+ { 0x1ec6, {1, {0x1ec7}}},
+ { 0x1ec8, {1, {0x1ec9}}},
+ { 0x1eca, {1, {0x1ecb}}},
+ { 0x1ecc, {1, {0x1ecd}}},
+ { 0x1ece, {1, {0x1ecf}}},
+ { 0x1ed0, {1, {0x1ed1}}},
+ { 0x1ed2, {1, {0x1ed3}}},
+ { 0x1ed4, {1, {0x1ed5}}},
+ { 0x1ed6, {1, {0x1ed7}}},
+ { 0x1ed8, {1, {0x1ed9}}},
+ { 0x1eda, {1, {0x1edb}}},
+ { 0x1edc, {1, {0x1edd}}},
+ { 0x1ede, {1, {0x1edf}}},
+ { 0x1ee0, {1, {0x1ee1}}},
+ { 0x1ee2, {1, {0x1ee3}}},
+ { 0x1ee4, {1, {0x1ee5}}},
+ { 0x1ee6, {1, {0x1ee7}}},
+ { 0x1ee8, {1, {0x1ee9}}},
+ { 0x1eea, {1, {0x1eeb}}},
+ { 0x1eec, {1, {0x1eed}}},
+ { 0x1eee, {1, {0x1eef}}},
+ { 0x1ef0, {1, {0x1ef1}}},
+ { 0x1ef2, {1, {0x1ef3}}},
+ { 0x1ef4, {1, {0x1ef5}}},
+ { 0x1ef6, {1, {0x1ef7}}},
+ { 0x1ef8, {1, {0x1ef9}}},
+ { 0x1f08, {1, {0x1f00}}},
+ { 0x1f09, {1, {0x1f01}}},
+ { 0x1f0a, {1, {0x1f02}}},
+ { 0x1f0b, {1, {0x1f03}}},
+ { 0x1f0c, {1, {0x1f04}}},
+ { 0x1f0d, {1, {0x1f05}}},
+ { 0x1f0e, {1, {0x1f06}}},
+ { 0x1f0f, {1, {0x1f07}}},
+ { 0x1f18, {1, {0x1f10}}},
+ { 0x1f19, {1, {0x1f11}}},
+ { 0x1f1a, {1, {0x1f12}}},
+ { 0x1f1b, {1, {0x1f13}}},
+ { 0x1f1c, {1, {0x1f14}}},
+ { 0x1f1d, {1, {0x1f15}}},
+ { 0x1f28, {1, {0x1f20}}},
+ { 0x1f29, {1, {0x1f21}}},
+ { 0x1f2a, {1, {0x1f22}}},
+ { 0x1f2b, {1, {0x1f23}}},
+ { 0x1f2c, {1, {0x1f24}}},
+ { 0x1f2d, {1, {0x1f25}}},
+ { 0x1f2e, {1, {0x1f26}}},
+ { 0x1f2f, {1, {0x1f27}}},
+ { 0x1f38, {1, {0x1f30}}},
+ { 0x1f39, {1, {0x1f31}}},
+ { 0x1f3a, {1, {0x1f32}}},
+ { 0x1f3b, {1, {0x1f33}}},
+ { 0x1f3c, {1, {0x1f34}}},
+ { 0x1f3d, {1, {0x1f35}}},
+ { 0x1f3e, {1, {0x1f36}}},
+ { 0x1f3f, {1, {0x1f37}}},
+ { 0x1f48, {1, {0x1f40}}},
+ { 0x1f49, {1, {0x1f41}}},
+ { 0x1f4a, {1, {0x1f42}}},
+ { 0x1f4b, {1, {0x1f43}}},
+ { 0x1f4c, {1, {0x1f44}}},
+ { 0x1f4d, {1, {0x1f45}}},
+ { 0x1f50, {2, {0x03c5, 0x0313}}},
+ { 0x1f52, {3, {0x03c5, 0x0313, 0x0300}}},
+ { 0x1f54, {3, {0x03c5, 0x0313, 0x0301}}},
+ { 0x1f56, {3, {0x03c5, 0x0313, 0x0342}}},
+ { 0x1f59, {1, {0x1f51}}},
+ { 0x1f5b, {1, {0x1f53}}},
+ { 0x1f5d, {1, {0x1f55}}},
+ { 0x1f5f, {1, {0x1f57}}},
+ { 0x1f68, {1, {0x1f60}}},
+ { 0x1f69, {1, {0x1f61}}},
+ { 0x1f6a, {1, {0x1f62}}},
+ { 0x1f6b, {1, {0x1f63}}},
+ { 0x1f6c, {1, {0x1f64}}},
+ { 0x1f6d, {1, {0x1f65}}},
+ { 0x1f6e, {1, {0x1f66}}},
+ { 0x1f6f, {1, {0x1f67}}},
+ { 0x1f80, {2, {0x1f00, 0x03b9}}},
+ { 0x1f81, {2, {0x1f01, 0x03b9}}},
+ { 0x1f82, {2, {0x1f02, 0x03b9}}},
+ { 0x1f83, {2, {0x1f03, 0x03b9}}},
+ { 0x1f84, {2, {0x1f04, 0x03b9}}},
+ { 0x1f85, {2, {0x1f05, 0x03b9}}},
+ { 0x1f86, {2, {0x1f06, 0x03b9}}},
+ { 0x1f87, {2, {0x1f07, 0x03b9}}},
+ { 0x1f88, {2, {0x1f00, 0x03b9}}},
+ { 0x1f89, {2, {0x1f01, 0x03b9}}},
+ { 0x1f8a, {2, {0x1f02, 0x03b9}}},
+ { 0x1f8b, {2, {0x1f03, 0x03b9}}},
+ { 0x1f8c, {2, {0x1f04, 0x03b9}}},
+ { 0x1f8d, {2, {0x1f05, 0x03b9}}},
+ { 0x1f8e, {2, {0x1f06, 0x03b9}}},
+ { 0x1f8f, {2, {0x1f07, 0x03b9}}},
+ { 0x1f90, {2, {0x1f20, 0x03b9}}},
+ { 0x1f91, {2, {0x1f21, 0x03b9}}},
+ { 0x1f92, {2, {0x1f22, 0x03b9}}},
+ { 0x1f93, {2, {0x1f23, 0x03b9}}},
+ { 0x1f94, {2, {0x1f24, 0x03b9}}},
+ { 0x1f95, {2, {0x1f25, 0x03b9}}},
+ { 0x1f96, {2, {0x1f26, 0x03b9}}},
+ { 0x1f97, {2, {0x1f27, 0x03b9}}},
+ { 0x1f98, {2, {0x1f20, 0x03b9}}},
+ { 0x1f99, {2, {0x1f21, 0x03b9}}},
+ { 0x1f9a, {2, {0x1f22, 0x03b9}}},
+ { 0x1f9b, {2, {0x1f23, 0x03b9}}},
+ { 0x1f9c, {2, {0x1f24, 0x03b9}}},
+ { 0x1f9d, {2, {0x1f25, 0x03b9}}},
+ { 0x1f9e, {2, {0x1f26, 0x03b9}}},
+ { 0x1f9f, {2, {0x1f27, 0x03b9}}},
+ { 0x1fa0, {2, {0x1f60, 0x03b9}}},
+ { 0x1fa1, {2, {0x1f61, 0x03b9}}},
+ { 0x1fa2, {2, {0x1f62, 0x03b9}}},
+ { 0x1fa3, {2, {0x1f63, 0x03b9}}},
+ { 0x1fa4, {2, {0x1f64, 0x03b9}}},
+ { 0x1fa5, {2, {0x1f65, 0x03b9}}},
+ { 0x1fa6, {2, {0x1f66, 0x03b9}}},
+ { 0x1fa7, {2, {0x1f67, 0x03b9}}},
+ { 0x1fa8, {2, {0x1f60, 0x03b9}}},
+ { 0x1fa9, {2, {0x1f61, 0x03b9}}},
+ { 0x1faa, {2, {0x1f62, 0x03b9}}},
+ { 0x1fab, {2, {0x1f63, 0x03b9}}},
+ { 0x1fac, {2, {0x1f64, 0x03b9}}},
+ { 0x1fad, {2, {0x1f65, 0x03b9}}},
+ { 0x1fae, {2, {0x1f66, 0x03b9}}},
+ { 0x1faf, {2, {0x1f67, 0x03b9}}},
+ { 0x1fb2, {2, {0x1f70, 0x03b9}}},
+ { 0x1fb3, {2, {0x03b1, 0x03b9}}},
+ { 0x1fb4, {2, {0x03ac, 0x03b9}}},
+ { 0x1fb6, {2, {0x03b1, 0x0342}}},
+ { 0x1fb7, {3, {0x03b1, 0x0342, 0x03b9}}},
+ { 0x1fb8, {1, {0x1fb0}}},
+ { 0x1fb9, {1, {0x1fb1}}},
+ { 0x1fba, {1, {0x1f70}}},
+ { 0x1fbb, {1, {0x1f71}}},
+ { 0x1fbc, {2, {0x03b1, 0x03b9}}},
+ { 0x1fbe, {1, {0x03b9}}},
+ { 0x1fc2, {2, {0x1f74, 0x03b9}}},
+ { 0x1fc3, {2, {0x03b7, 0x03b9}}},
+ { 0x1fc4, {2, {0x03ae, 0x03b9}}},
+ { 0x1fc6, {2, {0x03b7, 0x0342}}},
+ { 0x1fc7, {3, {0x03b7, 0x0342, 0x03b9}}},
+ { 0x1fc8, {1, {0x1f72}}},
+ { 0x1fc9, {1, {0x1f73}}},
+ { 0x1fca, {1, {0x1f74}}},
+ { 0x1fcb, {1, {0x1f75}}},
+ { 0x1fcc, {2, {0x03b7, 0x03b9}}},
+ { 0x1fd2, {3, {0x03b9, 0x0308, 0x0300}}},
+ { 0x1fd3, {3, {0x03b9, 0x0308, 0x0301}}},
+ { 0x1fd6, {2, {0x03b9, 0x0342}}},
+ { 0x1fd7, {3, {0x03b9, 0x0308, 0x0342}}},
+ { 0x1fd8, {1, {0x1fd0}}},
+ { 0x1fd9, {1, {0x1fd1}}},
+ { 0x1fda, {1, {0x1f76}}},
+ { 0x1fdb, {1, {0x1f77}}},
+ { 0x1fe2, {3, {0x03c5, 0x0308, 0x0300}}},
+ { 0x1fe3, {3, {0x03c5, 0x0308, 0x0301}}},
+ { 0x1fe4, {2, {0x03c1, 0x0313}}},
+ { 0x1fe6, {2, {0x03c5, 0x0342}}},
+ { 0x1fe7, {3, {0x03c5, 0x0308, 0x0342}}},
+ { 0x1fe8, {1, {0x1fe0}}},
+ { 0x1fe9, {1, {0x1fe1}}},
+ { 0x1fea, {1, {0x1f7a}}},
+ { 0x1feb, {1, {0x1f7b}}},
+ { 0x1fec, {1, {0x1fe5}}},
+ { 0x1ff2, {2, {0x1f7c, 0x03b9}}},
+ { 0x1ff3, {2, {0x03c9, 0x03b9}}},
+ { 0x1ff4, {2, {0x03ce, 0x03b9}}},
+ { 0x1ff6, {2, {0x03c9, 0x0342}}},
+ { 0x1ff7, {3, {0x03c9, 0x0342, 0x03b9}}},
+ { 0x1ff8, {1, {0x1f78}}},
+ { 0x1ff9, {1, {0x1f79}}},
+ { 0x1ffa, {1, {0x1f7c}}},
+ { 0x1ffb, {1, {0x1f7d}}},
+ { 0x1ffc, {2, {0x03c9, 0x03b9}}},
+ { 0x2126, {1, {0x03c9}}},
+ { 0x212a, {1, {0x006b}}},
+ { 0x212b, {1, {0x00e5}}},
+ { 0x2160, {1, {0x2170}}},
+ { 0x2161, {1, {0x2171}}},
+ { 0x2162, {1, {0x2172}}},
+ { 0x2163, {1, {0x2173}}},
+ { 0x2164, {1, {0x2174}}},
+ { 0x2165, {1, {0x2175}}},
+ { 0x2166, {1, {0x2176}}},
+ { 0x2167, {1, {0x2177}}},
+ { 0x2168, {1, {0x2178}}},
+ { 0x2169, {1, {0x2179}}},
+ { 0x216a, {1, {0x217a}}},
+ { 0x216b, {1, {0x217b}}},
+ { 0x216c, {1, {0x217c}}},
+ { 0x216d, {1, {0x217d}}},
+ { 0x216e, {1, {0x217e}}},
+ { 0x216f, {1, {0x217f}}},
+ { 0x24b6, {1, {0x24d0}}},
+ { 0x24b7, {1, {0x24d1}}},
+ { 0x24b8, {1, {0x24d2}}},
+ { 0x24b9, {1, {0x24d3}}},
+ { 0x24ba, {1, {0x24d4}}},
+ { 0x24bb, {1, {0x24d5}}},
+ { 0x24bc, {1, {0x24d6}}},
+ { 0x24bd, {1, {0x24d7}}},
+ { 0x24be, {1, {0x24d8}}},
+ { 0x24bf, {1, {0x24d9}}},
+ { 0x24c0, {1, {0x24da}}},
+ { 0x24c1, {1, {0x24db}}},
+ { 0x24c2, {1, {0x24dc}}},
+ { 0x24c3, {1, {0x24dd}}},
+ { 0x24c4, {1, {0x24de}}},
+ { 0x24c5, {1, {0x24df}}},
+ { 0x24c6, {1, {0x24e0}}},
+ { 0x24c7, {1, {0x24e1}}},
+ { 0x24c8, {1, {0x24e2}}},
+ { 0x24c9, {1, {0x24e3}}},
+ { 0x24ca, {1, {0x24e4}}},
+ { 0x24cb, {1, {0x24e5}}},
+ { 0x24cc, {1, {0x24e6}}},
+ { 0x24cd, {1, {0x24e7}}},
+ { 0x24ce, {1, {0x24e8}}},
+ { 0x24cf, {1, {0x24e9}}},
+ { 0x2c00, {1, {0x2c30}}},
+ { 0x2c01, {1, {0x2c31}}},
+ { 0x2c02, {1, {0x2c32}}},
+ { 0x2c03, {1, {0x2c33}}},
+ { 0x2c04, {1, {0x2c34}}},
+ { 0x2c05, {1, {0x2c35}}},
+ { 0x2c06, {1, {0x2c36}}},
+ { 0x2c07, {1, {0x2c37}}},
+ { 0x2c08, {1, {0x2c38}}},
+ { 0x2c09, {1, {0x2c39}}},
+ { 0x2c0a, {1, {0x2c3a}}},
+ { 0x2c0b, {1, {0x2c3b}}},
+ { 0x2c0c, {1, {0x2c3c}}},
+ { 0x2c0d, {1, {0x2c3d}}},
+ { 0x2c0e, {1, {0x2c3e}}},
+ { 0x2c0f, {1, {0x2c3f}}},
+ { 0x2c10, {1, {0x2c40}}},
+ { 0x2c11, {1, {0x2c41}}},
+ { 0x2c12, {1, {0x2c42}}},
+ { 0x2c13, {1, {0x2c43}}},
+ { 0x2c14, {1, {0x2c44}}},
+ { 0x2c15, {1, {0x2c45}}},
+ { 0x2c16, {1, {0x2c46}}},
+ { 0x2c17, {1, {0x2c47}}},
+ { 0x2c18, {1, {0x2c48}}},
+ { 0x2c19, {1, {0x2c49}}},
+ { 0x2c1a, {1, {0x2c4a}}},
+ { 0x2c1b, {1, {0x2c4b}}},
+ { 0x2c1c, {1, {0x2c4c}}},
+ { 0x2c1d, {1, {0x2c4d}}},
+ { 0x2c1e, {1, {0x2c4e}}},
+ { 0x2c1f, {1, {0x2c4f}}},
+ { 0x2c20, {1, {0x2c50}}},
+ { 0x2c21, {1, {0x2c51}}},
+ { 0x2c22, {1, {0x2c52}}},
+ { 0x2c23, {1, {0x2c53}}},
+ { 0x2c24, {1, {0x2c54}}},
+ { 0x2c25, {1, {0x2c55}}},
+ { 0x2c26, {1, {0x2c56}}},
+ { 0x2c27, {1, {0x2c57}}},
+ { 0x2c28, {1, {0x2c58}}},
+ { 0x2c29, {1, {0x2c59}}},
+ { 0x2c2a, {1, {0x2c5a}}},
+ { 0x2c2b, {1, {0x2c5b}}},
+ { 0x2c2c, {1, {0x2c5c}}},
+ { 0x2c2d, {1, {0x2c5d}}},
+ { 0x2c2e, {1, {0x2c5e}}},
+ { 0x2c80, {1, {0x2c81}}},
+ { 0x2c82, {1, {0x2c83}}},
+ { 0x2c84, {1, {0x2c85}}},
+ { 0x2c86, {1, {0x2c87}}},
+ { 0x2c88, {1, {0x2c89}}},
+ { 0x2c8a, {1, {0x2c8b}}},
+ { 0x2c8c, {1, {0x2c8d}}},
+ { 0x2c8e, {1, {0x2c8f}}},
+ { 0x2c90, {1, {0x2c91}}},
+ { 0x2c92, {1, {0x2c93}}},
+ { 0x2c94, {1, {0x2c95}}},
+ { 0x2c96, {1, {0x2c97}}},
+ { 0x2c98, {1, {0x2c99}}},
+ { 0x2c9a, {1, {0x2c9b}}},
+ { 0x2c9c, {1, {0x2c9d}}},
+ { 0x2c9e, {1, {0x2c9f}}},
+ { 0x2ca0, {1, {0x2ca1}}},
+ { 0x2ca2, {1, {0x2ca3}}},
+ { 0x2ca4, {1, {0x2ca5}}},
+ { 0x2ca6, {1, {0x2ca7}}},
+ { 0x2ca8, {1, {0x2ca9}}},
+ { 0x2caa, {1, {0x2cab}}},
+ { 0x2cac, {1, {0x2cad}}},
+ { 0x2cae, {1, {0x2caf}}},
+ { 0x2cb0, {1, {0x2cb1}}},
+ { 0x2cb2, {1, {0x2cb3}}},
+ { 0x2cb4, {1, {0x2cb5}}},
+ { 0x2cb6, {1, {0x2cb7}}},
+ { 0x2cb8, {1, {0x2cb9}}},
+ { 0x2cba, {1, {0x2cbb}}},
+ { 0x2cbc, {1, {0x2cbd}}},
+ { 0x2cbe, {1, {0x2cbf}}},
+ { 0x2cc0, {1, {0x2cc1}}},
+ { 0x2cc2, {1, {0x2cc3}}},
+ { 0x2cc4, {1, {0x2cc5}}},
+ { 0x2cc6, {1, {0x2cc7}}},
+ { 0x2cc8, {1, {0x2cc9}}},
+ { 0x2cca, {1, {0x2ccb}}},
+ { 0x2ccc, {1, {0x2ccd}}},
+ { 0x2cce, {1, {0x2ccf}}},
+ { 0x2cd0, {1, {0x2cd1}}},
+ { 0x2cd2, {1, {0x2cd3}}},
+ { 0x2cd4, {1, {0x2cd5}}},
+ { 0x2cd6, {1, {0x2cd7}}},
+ { 0x2cd8, {1, {0x2cd9}}},
+ { 0x2cda, {1, {0x2cdb}}},
+ { 0x2cdc, {1, {0x2cdd}}},
+ { 0x2cde, {1, {0x2cdf}}},
+ { 0x2ce0, {1, {0x2ce1}}},
+ { 0x2ce2, {1, {0x2ce3}}},
+ { 0xfb00, {2, {0x0066, 0x0066}}},
+ { 0xfb01, {2, {0x0066, 0x0069}}},
+ { 0xfb02, {2, {0x0066, 0x006c}}},
+ { 0xfb03, {3, {0x0066, 0x0066, 0x0069}}},
+ { 0xfb04, {3, {0x0066, 0x0066, 0x006c}}},
+ { 0xfb05, {2, {0x0073, 0x0074}}},
+ { 0xfb06, {2, {0x0073, 0x0074}}},
+ { 0xfb13, {2, {0x0574, 0x0576}}},
+ { 0xfb14, {2, {0x0574, 0x0565}}},
+ { 0xfb15, {2, {0x0574, 0x056b}}},
+ { 0xfb16, {2, {0x057e, 0x0576}}},
+ { 0xfb17, {2, {0x0574, 0x056d}}},
+ { 0xff21, {1, {0xff41}}},
+ { 0xff22, {1, {0xff42}}},
+ { 0xff23, {1, {0xff43}}},
+ { 0xff24, {1, {0xff44}}},
+ { 0xff25, {1, {0xff45}}},
+ { 0xff26, {1, {0xff46}}},
+ { 0xff27, {1, {0xff47}}},
+ { 0xff28, {1, {0xff48}}},
+ { 0xff29, {1, {0xff49}}},
+ { 0xff2a, {1, {0xff4a}}},
+ { 0xff2b, {1, {0xff4b}}},
+ { 0xff2c, {1, {0xff4c}}},
+ { 0xff2d, {1, {0xff4d}}},
+ { 0xff2e, {1, {0xff4e}}},
+ { 0xff2f, {1, {0xff4f}}},
+ { 0xff30, {1, {0xff50}}},
+ { 0xff31, {1, {0xff51}}},
+ { 0xff32, {1, {0xff52}}},
+ { 0xff33, {1, {0xff53}}},
+ { 0xff34, {1, {0xff54}}},
+ { 0xff35, {1, {0xff55}}},
+ { 0xff36, {1, {0xff56}}},
+ { 0xff37, {1, {0xff57}}},
+ { 0xff38, {1, {0xff58}}},
+ { 0xff39, {1, {0xff59}}},
+ { 0xff3a, {1, {0xff5a}}},
+ { 0x10400, {1, {0x10428}}},
+ { 0x10401, {1, {0x10429}}},
+ { 0x10402, {1, {0x1042a}}},
+ { 0x10403, {1, {0x1042b}}},
+ { 0x10404, {1, {0x1042c}}},
+ { 0x10405, {1, {0x1042d}}},
+ { 0x10406, {1, {0x1042e}}},
+ { 0x10407, {1, {0x1042f}}},
+ { 0x10408, {1, {0x10430}}},
+ { 0x10409, {1, {0x10431}}},
+ { 0x1040a, {1, {0x10432}}},
+ { 0x1040b, {1, {0x10433}}},
+ { 0x1040c, {1, {0x10434}}},
+ { 0x1040d, {1, {0x10435}}},
+ { 0x1040e, {1, {0x10436}}},
+ { 0x1040f, {1, {0x10437}}},
+ { 0x10410, {1, {0x10438}}},
+ { 0x10411, {1, {0x10439}}},
+ { 0x10412, {1, {0x1043a}}},
+ { 0x10413, {1, {0x1043b}}},
+ { 0x10414, {1, {0x1043c}}},
+ { 0x10415, {1, {0x1043d}}},
+ { 0x10416, {1, {0x1043e}}},
+ { 0x10417, {1, {0x1043f}}},
+ { 0x10418, {1, {0x10440}}},
+ { 0x10419, {1, {0x10441}}},
+ { 0x1041a, {1, {0x10442}}},
+ { 0x1041b, {1, {0x10443}}},
+ { 0x1041c, {1, {0x10444}}},
+ { 0x1041d, {1, {0x10445}}},
+ { 0x1041e, {1, {0x10446}}},
+ { 0x1041f, {1, {0x10447}}},
+ { 0x10420, {1, {0x10448}}},
+ { 0x10421, {1, {0x10449}}},
+ { 0x10422, {1, {0x1044a}}},
+ { 0x10423, {1, {0x1044b}}},
+ { 0x10424, {1, {0x1044c}}},
+ { 0x10425, {1, {0x1044d}}},
+ { 0x10426, {1, {0x1044e}}},
+ { 0x10427, {1, {0x1044f}}}
+};
+
+static const CaseFold_11_Type CaseFold_Locale[] = {
+ { 0x0049, {1, {0x0069}}},
+ { 0x0130, {2, {0x0069, 0x0307}}}
+};
+
+static const CaseUnfold_11_Type CaseUnfold_11[] = {
+ { 0x0061, {1, {0x0041 }}},
+ { 0x0062, {1, {0x0042 }}},
+ { 0x0063, {1, {0x0043 }}},
+ { 0x0064, {1, {0x0044 }}},
+ { 0x0065, {1, {0x0045 }}},
+ { 0x0066, {1, {0x0046 }}},
+ { 0x0067, {1, {0x0047 }}},
+ { 0x0068, {1, {0x0048 }}},
+ { 0x006a, {1, {0x004a }}},
+ { 0x006b, {2, {0x212a, 0x004b }}},
+ { 0x006c, {1, {0x004c }}},
+ { 0x006d, {1, {0x004d }}},
+ { 0x006e, {1, {0x004e }}},
+ { 0x006f, {1, {0x004f }}},
+ { 0x0070, {1, {0x0050 }}},
+ { 0x0071, {1, {0x0051 }}},
+ { 0x0072, {1, {0x0052 }}},
+ { 0x0073, {2, {0x0053, 0x017f }}},
+ { 0x0074, {1, {0x0054 }}},
+ { 0x0075, {1, {0x0055 }}},
+ { 0x0076, {1, {0x0056 }}},
+ { 0x0077, {1, {0x0057 }}},
+ { 0x0078, {1, {0x0058 }}},
+ { 0x0079, {1, {0x0059 }}},
+ { 0x007a, {1, {0x005a }}},
+ { 0x00e0, {1, {0x00c0 }}},
+ { 0x00e1, {1, {0x00c1 }}},
+ { 0x00e2, {1, {0x00c2 }}},
+ { 0x00e3, {1, {0x00c3 }}},
+ { 0x00e4, {1, {0x00c4 }}},
+ { 0x00e5, {2, {0x212b, 0x00c5 }}},
+ { 0x00e6, {1, {0x00c6 }}},
+ { 0x00e7, {1, {0x00c7 }}},
+ { 0x00e8, {1, {0x00c8 }}},
+ { 0x00e9, {1, {0x00c9 }}},
+ { 0x00ea, {1, {0x00ca }}},
+ { 0x00eb, {1, {0x00cb }}},
+ { 0x00ec, {1, {0x00cc }}},
+ { 0x00ed, {1, {0x00cd }}},
+ { 0x00ee, {1, {0x00ce }}},
+ { 0x00ef, {1, {0x00cf }}},
+ { 0x00f0, {1, {0x00d0 }}},
+ { 0x00f1, {1, {0x00d1 }}},
+ { 0x00f2, {1, {0x00d2 }}},
+ { 0x00f3, {1, {0x00d3 }}},
+ { 0x00f4, {1, {0x00d4 }}},
+ { 0x00f5, {1, {0x00d5 }}},
+ { 0x00f6, {1, {0x00d6 }}},
+ { 0x00f8, {1, {0x00d8 }}},
+ { 0x00f9, {1, {0x00d9 }}},
+ { 0x00fa, {1, {0x00da }}},
+ { 0x00fb, {1, {0x00db }}},
+ { 0x00fc, {1, {0x00dc }}},
+ { 0x00fd, {1, {0x00dd }}},
+ { 0x00fe, {1, {0x00de }}},
+ { 0x00ff, {1, {0x0178 }}},
+ { 0x0101, {1, {0x0100 }}},
+ { 0x0103, {1, {0x0102 }}},
+ { 0x0105, {1, {0x0104 }}},
+ { 0x0107, {1, {0x0106 }}},
+ { 0x0109, {1, {0x0108 }}},
+ { 0x010b, {1, {0x010a }}},
+ { 0x010d, {1, {0x010c }}},
+ { 0x010f, {1, {0x010e }}},
+ { 0x0111, {1, {0x0110 }}},
+ { 0x0113, {1, {0x0112 }}},
+ { 0x0115, {1, {0x0114 }}},
+ { 0x0117, {1, {0x0116 }}},
+ { 0x0119, {1, {0x0118 }}},
+ { 0x011b, {1, {0x011a }}},
+ { 0x011d, {1, {0x011c }}},
+ { 0x011f, {1, {0x011e }}},
+ { 0x0121, {1, {0x0120 }}},
+ { 0x0123, {1, {0x0122 }}},
+ { 0x0125, {1, {0x0124 }}},
+ { 0x0127, {1, {0x0126 }}},
+ { 0x0129, {1, {0x0128 }}},
+ { 0x012b, {1, {0x012a }}},
+ { 0x012d, {1, {0x012c }}},
+ { 0x012f, {1, {0x012e }}},
+ { 0x0133, {1, {0x0132 }}},
+ { 0x0135, {1, {0x0134 }}},
+ { 0x0137, {1, {0x0136 }}},
+ { 0x013a, {1, {0x0139 }}},
+ { 0x013c, {1, {0x013b }}},
+ { 0x013e, {1, {0x013d }}},
+ { 0x0140, {1, {0x013f }}},
+ { 0x0142, {1, {0x0141 }}},
+ { 0x0144, {1, {0x0143 }}},
+ { 0x0146, {1, {0x0145 }}},
+ { 0x0148, {1, {0x0147 }}},
+ { 0x014b, {1, {0x014a }}},
+ { 0x014d, {1, {0x014c }}},
+ { 0x014f, {1, {0x014e }}},
+ { 0x0151, {1, {0x0150 }}},
+ { 0x0153, {1, {0x0152 }}},
+ { 0x0155, {1, {0x0154 }}},
+ { 0x0157, {1, {0x0156 }}},
+ { 0x0159, {1, {0x0158 }}},
+ { 0x015b, {1, {0x015a }}},
+ { 0x015d, {1, {0x015c }}},
+ { 0x015f, {1, {0x015e }}},
+ { 0x0161, {1, {0x0160 }}},
+ { 0x0163, {1, {0x0162 }}},
+ { 0x0165, {1, {0x0164 }}},
+ { 0x0167, {1, {0x0166 }}},
+ { 0x0169, {1, {0x0168 }}},
+ { 0x016b, {1, {0x016a }}},
+ { 0x016d, {1, {0x016c }}},
+ { 0x016f, {1, {0x016e }}},
+ { 0x0171, {1, {0x0170 }}},
+ { 0x0173, {1, {0x0172 }}},
+ { 0x0175, {1, {0x0174 }}},
+ { 0x0177, {1, {0x0176 }}},
+ { 0x017a, {1, {0x0179 }}},
+ { 0x017c, {1, {0x017b }}},
+ { 0x017e, {1, {0x017d }}},
+ { 0x0183, {1, {0x0182 }}},
+ { 0x0185, {1, {0x0184 }}},
+ { 0x0188, {1, {0x0187 }}},
+ { 0x018c, {1, {0x018b }}},
+ { 0x0192, {1, {0x0191 }}},
+ { 0x0195, {1, {0x01f6 }}},
+ { 0x0199, {1, {0x0198 }}},
+ { 0x019a, {1, {0x023d }}},
+ { 0x019e, {1, {0x0220 }}},
+ { 0x01a1, {1, {0x01a0 }}},
+ { 0x01a3, {1, {0x01a2 }}},
+ { 0x01a5, {1, {0x01a4 }}},
+ { 0x01a8, {1, {0x01a7 }}},
+ { 0x01ad, {1, {0x01ac }}},
+ { 0x01b0, {1, {0x01af }}},
+ { 0x01b4, {1, {0x01b3 }}},
+ { 0x01b6, {1, {0x01b5 }}},
+ { 0x01b9, {1, {0x01b8 }}},
+ { 0x01bd, {1, {0x01bc }}},
+ { 0x01bf, {1, {0x01f7 }}},
+ { 0x01c6, {2, {0x01c4, 0x01c5 }}},
+ { 0x01c9, {2, {0x01c7, 0x01c8 }}},
+ { 0x01cc, {2, {0x01ca, 0x01cb }}},
+ { 0x01ce, {1, {0x01cd }}},
+ { 0x01d0, {1, {0x01cf }}},
+ { 0x01d2, {1, {0x01d1 }}},
+ { 0x01d4, {1, {0x01d3 }}},
+ { 0x01d6, {1, {0x01d5 }}},
+ { 0x01d8, {1, {0x01d7 }}},
+ { 0x01da, {1, {0x01d9 }}},
+ { 0x01dc, {1, {0x01db }}},
+ { 0x01dd, {1, {0x018e }}},
+ { 0x01df, {1, {0x01de }}},
+ { 0x01e1, {1, {0x01e0 }}},
+ { 0x01e3, {1, {0x01e2 }}},
+ { 0x01e5, {1, {0x01e4 }}},
+ { 0x01e7, {1, {0x01e6 }}},
+ { 0x01e9, {1, {0x01e8 }}},
+ { 0x01eb, {1, {0x01ea }}},
+ { 0x01ed, {1, {0x01ec }}},
+ { 0x01ef, {1, {0x01ee }}},
+ { 0x01f3, {2, {0x01f1, 0x01f2 }}},
+ { 0x01f5, {1, {0x01f4 }}},
+ { 0x01f9, {1, {0x01f8 }}},
+ { 0x01fb, {1, {0x01fa }}},
+ { 0x01fd, {1, {0x01fc }}},
+ { 0x01ff, {1, {0x01fe }}},
+ { 0x0201, {1, {0x0200 }}},
+ { 0x0203, {1, {0x0202 }}},
+ { 0x0205, {1, {0x0204 }}},
+ { 0x0207, {1, {0x0206 }}},
+ { 0x0209, {1, {0x0208 }}},
+ { 0x020b, {1, {0x020a }}},
+ { 0x020d, {1, {0x020c }}},
+ { 0x020f, {1, {0x020e }}},
+ { 0x0211, {1, {0x0210 }}},
+ { 0x0213, {1, {0x0212 }}},
+ { 0x0215, {1, {0x0214 }}},
+ { 0x0217, {1, {0x0216 }}},
+ { 0x0219, {1, {0x0218 }}},
+ { 0x021b, {1, {0x021a }}},
+ { 0x021d, {1, {0x021c }}},
+ { 0x021f, {1, {0x021e }}},
+ { 0x0223, {1, {0x0222 }}},
+ { 0x0225, {1, {0x0224 }}},
+ { 0x0227, {1, {0x0226 }}},
+ { 0x0229, {1, {0x0228 }}},
+ { 0x022b, {1, {0x022a }}},
+ { 0x022d, {1, {0x022c }}},
+ { 0x022f, {1, {0x022e }}},
+ { 0x0231, {1, {0x0230 }}},
+ { 0x0233, {1, {0x0232 }}},
+ { 0x023c, {1, {0x023b }}},
+ { 0x0253, {1, {0x0181 }}},
+ { 0x0254, {1, {0x0186 }}},
+ { 0x0256, {1, {0x0189 }}},
+ { 0x0257, {1, {0x018a }}},
+ { 0x0259, {1, {0x018f }}},
+ { 0x025b, {1, {0x0190 }}},
+ { 0x0260, {1, {0x0193 }}},
+ { 0x0263, {1, {0x0194 }}},
+ { 0x0268, {1, {0x0197 }}},
+ { 0x0269, {1, {0x0196 }}},
+ { 0x026f, {1, {0x019c }}},
+ { 0x0272, {1, {0x019d }}},
+ { 0x0275, {1, {0x019f }}},
+ { 0x0280, {1, {0x01a6 }}},
+ { 0x0283, {1, {0x01a9 }}},
+ { 0x0288, {1, {0x01ae }}},
+ { 0x028a, {1, {0x01b1 }}},
+ { 0x028b, {1, {0x01b2 }}},
+ { 0x0292, {1, {0x01b7 }}},
+ { 0x0294, {1, {0x0241 }}},
+ { 0x03ac, {1, {0x0386 }}},
+ { 0x03ad, {1, {0x0388 }}},
+ { 0x03ae, {1, {0x0389 }}},
+ { 0x03af, {1, {0x038a }}},
+ { 0x03b1, {1, {0x0391 }}},
+ { 0x03b2, {2, {0x0392, 0x03d0 }}},
+ { 0x03b3, {1, {0x0393 }}},
+ { 0x03b4, {1, {0x0394 }}},
+ { 0x03b5, {2, {0x03f5, 0x0395 }}},
+ { 0x03b6, {1, {0x0396 }}},
+ { 0x03b7, {1, {0x0397 }}},
+ { 0x03b8, {3, {0x03f4, 0x0398, 0x03d1 }}},
+ { 0x03b9, {3, {0x1fbe, 0x0399, 0x0345 }}},
+ { 0x03ba, {2, {0x03f0, 0x039a }}},
+ { 0x03bb, {1, {0x039b }}},
+ { 0x03bc, {2, {0x00b5, 0x039c }}},
+ { 0x03bd, {1, {0x039d }}},
+ { 0x03be, {1, {0x039e }}},
+ { 0x03bf, {1, {0x039f }}},
+ { 0x03c0, {2, {0x03a0, 0x03d6 }}},
+ { 0x03c1, {2, {0x03f1, 0x03a1 }}},
+ { 0x03c3, {2, {0x03a3, 0x03c2 }}},
+ { 0x03c4, {1, {0x03a4 }}},
+ { 0x03c5, {1, {0x03a5 }}},
+ { 0x03c6, {2, {0x03a6, 0x03d5 }}},
+ { 0x03c7, {1, {0x03a7 }}},
+ { 0x03c8, {1, {0x03a8 }}},
+ { 0x03c9, {2, {0x03a9, 0x2126 }}},
+ { 0x03ca, {1, {0x03aa }}},
+ { 0x03cb, {1, {0x03ab }}},
+ { 0x03cc, {1, {0x038c }}},
+ { 0x03cd, {1, {0x038e }}},
+ { 0x03ce, {1, {0x038f }}},
+ { 0x03d9, {1, {0x03d8 }}},
+ { 0x03db, {1, {0x03da }}},
+ { 0x03dd, {1, {0x03dc }}},
+ { 0x03df, {1, {0x03de }}},
+ { 0x03e1, {1, {0x03e0 }}},
+ { 0x03e3, {1, {0x03e2 }}},
+ { 0x03e5, {1, {0x03e4 }}},
+ { 0x03e7, {1, {0x03e6 }}},
+ { 0x03e9, {1, {0x03e8 }}},
+ { 0x03eb, {1, {0x03ea }}},
+ { 0x03ed, {1, {0x03ec }}},
+ { 0x03ef, {1, {0x03ee }}},
+ { 0x03f2, {1, {0x03f9 }}},
+ { 0x03f8, {1, {0x03f7 }}},
+ { 0x03fb, {1, {0x03fa }}},
+ { 0x0430, {1, {0x0410 }}},
+ { 0x0431, {1, {0x0411 }}},
+ { 0x0432, {1, {0x0412 }}},
+ { 0x0433, {1, {0x0413 }}},
+ { 0x0434, {1, {0x0414 }}},
+ { 0x0435, {1, {0x0415 }}},
+ { 0x0436, {1, {0x0416 }}},
+ { 0x0437, {1, {0x0417 }}},
+ { 0x0438, {1, {0x0418 }}},
+ { 0x0439, {1, {0x0419 }}},
+ { 0x043a, {1, {0x041a }}},
+ { 0x043b, {1, {0x041b }}},
+ { 0x043c, {1, {0x041c }}},
+ { 0x043d, {1, {0x041d }}},
+ { 0x043e, {1, {0x041e }}},
+ { 0x043f, {1, {0x041f }}},
+ { 0x0440, {1, {0x0420 }}},
+ { 0x0441, {1, {0x0421 }}},
+ { 0x0442, {1, {0x0422 }}},
+ { 0x0443, {1, {0x0423 }}},
+ { 0x0444, {1, {0x0424 }}},
+ { 0x0445, {1, {0x0425 }}},
+ { 0x0446, {1, {0x0426 }}},
+ { 0x0447, {1, {0x0427 }}},
+ { 0x0448, {1, {0x0428 }}},
+ { 0x0449, {1, {0x0429 }}},
+ { 0x044a, {1, {0x042a }}},
+ { 0x044b, {1, {0x042b }}},
+ { 0x044c, {1, {0x042c }}},
+ { 0x044d, {1, {0x042d }}},
+ { 0x044e, {1, {0x042e }}},
+ { 0x044f, {1, {0x042f }}},
+ { 0x0450, {1, {0x0400 }}},
+ { 0x0451, {1, {0x0401 }}},
+ { 0x0452, {1, {0x0402 }}},
+ { 0x0453, {1, {0x0403 }}},
+ { 0x0454, {1, {0x0404 }}},
+ { 0x0455, {1, {0x0405 }}},
+ { 0x0456, {1, {0x0406 }}},
+ { 0x0457, {1, {0x0407 }}},
+ { 0x0458, {1, {0x0408 }}},
+ { 0x0459, {1, {0x0409 }}},
+ { 0x045a, {1, {0x040a }}},
+ { 0x045b, {1, {0x040b }}},
+ { 0x045c, {1, {0x040c }}},
+ { 0x045d, {1, {0x040d }}},
+ { 0x045e, {1, {0x040e }}},
+ { 0x045f, {1, {0x040f }}},
+ { 0x0461, {1, {0x0460 }}},
+ { 0x0463, {1, {0x0462 }}},
+ { 0x0465, {1, {0x0464 }}},
+ { 0x0467, {1, {0x0466 }}},
+ { 0x0469, {1, {0x0468 }}},
+ { 0x046b, {1, {0x046a }}},
+ { 0x046d, {1, {0x046c }}},
+ { 0x046f, {1, {0x046e }}},
+ { 0x0471, {1, {0x0470 }}},
+ { 0x0473, {1, {0x0472 }}},
+ { 0x0475, {1, {0x0474 }}},
+ { 0x0477, {1, {0x0476 }}},
+ { 0x0479, {1, {0x0478 }}},
+ { 0x047b, {1, {0x047a }}},
+ { 0x047d, {1, {0x047c }}},
+ { 0x047f, {1, {0x047e }}},
+ { 0x0481, {1, {0x0480 }}},
+ { 0x048b, {1, {0x048a }}},
+ { 0x048d, {1, {0x048c }}},
+ { 0x048f, {1, {0x048e }}},
+ { 0x0491, {1, {0x0490 }}},
+ { 0x0493, {1, {0x0492 }}},
+ { 0x0495, {1, {0x0494 }}},
+ { 0x0497, {1, {0x0496 }}},
+ { 0x0499, {1, {0x0498 }}},
+ { 0x049b, {1, {0x049a }}},
+ { 0x049d, {1, {0x049c }}},
+ { 0x049f, {1, {0x049e }}},
+ { 0x04a1, {1, {0x04a0 }}},
+ { 0x04a3, {1, {0x04a2 }}},
+ { 0x04a5, {1, {0x04a4 }}},
+ { 0x04a7, {1, {0x04a6 }}},
+ { 0x04a9, {1, {0x04a8 }}},
+ { 0x04ab, {1, {0x04aa }}},
+ { 0x04ad, {1, {0x04ac }}},
+ { 0x04af, {1, {0x04ae }}},
+ { 0x04b1, {1, {0x04b0 }}},
+ { 0x04b3, {1, {0x04b2 }}},
+ { 0x04b5, {1, {0x04b4 }}},
+ { 0x04b7, {1, {0x04b6 }}},
+ { 0x04b9, {1, {0x04b8 }}},
+ { 0x04bb, {1, {0x04ba }}},
+ { 0x04bd, {1, {0x04bc }}},
+ { 0x04bf, {1, {0x04be }}},
+ { 0x04c2, {1, {0x04c1 }}},
+ { 0x04c4, {1, {0x04c3 }}},
+ { 0x04c6, {1, {0x04c5 }}},
+ { 0x04c8, {1, {0x04c7 }}},
+ { 0x04ca, {1, {0x04c9 }}},
+ { 0x04cc, {1, {0x04cb }}},
+ { 0x04ce, {1, {0x04cd }}},
+ { 0x04d1, {1, {0x04d0 }}},
+ { 0x04d3, {1, {0x04d2 }}},
+ { 0x04d5, {1, {0x04d4 }}},
+ { 0x04d7, {1, {0x04d6 }}},
+ { 0x04d9, {1, {0x04d8 }}},
+ { 0x04db, {1, {0x04da }}},
+ { 0x04dd, {1, {0x04dc }}},
+ { 0x04df, {1, {0x04de }}},
+ { 0x04e1, {1, {0x04e0 }}},
+ { 0x04e3, {1, {0x04e2 }}},
+ { 0x04e5, {1, {0x04e4 }}},
+ { 0x04e7, {1, {0x04e6 }}},
+ { 0x04e9, {1, {0x04e8 }}},
+ { 0x04eb, {1, {0x04ea }}},
+ { 0x04ed, {1, {0x04ec }}},
+ { 0x04ef, {1, {0x04ee }}},
+ { 0x04f1, {1, {0x04f0 }}},
+ { 0x04f3, {1, {0x04f2 }}},
+ { 0x04f5, {1, {0x04f4 }}},
+ { 0x04f7, {1, {0x04f6 }}},
+ { 0x04f9, {1, {0x04f8 }}},
+ { 0x0501, {1, {0x0500 }}},
+ { 0x0503, {1, {0x0502 }}},
+ { 0x0505, {1, {0x0504 }}},
+ { 0x0507, {1, {0x0506 }}},
+ { 0x0509, {1, {0x0508 }}},
+ { 0x050b, {1, {0x050a }}},
+ { 0x050d, {1, {0x050c }}},
+ { 0x050f, {1, {0x050e }}},
+ { 0x0561, {1, {0x0531 }}},
+ { 0x0562, {1, {0x0532 }}},
+ { 0x0563, {1, {0x0533 }}},
+ { 0x0564, {1, {0x0534 }}},
+ { 0x0565, {1, {0x0535 }}},
+ { 0x0566, {1, {0x0536 }}},
+ { 0x0567, {1, {0x0537 }}},
+ { 0x0568, {1, {0x0538 }}},
+ { 0x0569, {1, {0x0539 }}},
+ { 0x056a, {1, {0x053a }}},
+ { 0x056b, {1, {0x053b }}},
+ { 0x056c, {1, {0x053c }}},
+ { 0x056d, {1, {0x053d }}},
+ { 0x056e, {1, {0x053e }}},
+ { 0x056f, {1, {0x053f }}},
+ { 0x0570, {1, {0x0540 }}},
+ { 0x0571, {1, {0x0541 }}},
+ { 0x0572, {1, {0x0542 }}},
+ { 0x0573, {1, {0x0543 }}},
+ { 0x0574, {1, {0x0544 }}},
+ { 0x0575, {1, {0x0545 }}},
+ { 0x0576, {1, {0x0546 }}},
+ { 0x0577, {1, {0x0547 }}},
+ { 0x0578, {1, {0x0548 }}},
+ { 0x0579, {1, {0x0549 }}},
+ { 0x057a, {1, {0x054a }}},
+ { 0x057b, {1, {0x054b }}},
+ { 0x057c, {1, {0x054c }}},
+ { 0x057d, {1, {0x054d }}},
+ { 0x057e, {1, {0x054e }}},
+ { 0x057f, {1, {0x054f }}},
+ { 0x0580, {1, {0x0550 }}},
+ { 0x0581, {1, {0x0551 }}},
+ { 0x0582, {1, {0x0552 }}},
+ { 0x0583, {1, {0x0553 }}},
+ { 0x0584, {1, {0x0554 }}},
+ { 0x0585, {1, {0x0555 }}},
+ { 0x0586, {1, {0x0556 }}},
+ { 0x1e01, {1, {0x1e00 }}},
+ { 0x1e03, {1, {0x1e02 }}},
+ { 0x1e05, {1, {0x1e04 }}},
+ { 0x1e07, {1, {0x1e06 }}},
+ { 0x1e09, {1, {0x1e08 }}},
+ { 0x1e0b, {1, {0x1e0a }}},
+ { 0x1e0d, {1, {0x1e0c }}},
+ { 0x1e0f, {1, {0x1e0e }}},
+ { 0x1e11, {1, {0x1e10 }}},
+ { 0x1e13, {1, {0x1e12 }}},
+ { 0x1e15, {1, {0x1e14 }}},
+ { 0x1e17, {1, {0x1e16 }}},
+ { 0x1e19, {1, {0x1e18 }}},
+ { 0x1e1b, {1, {0x1e1a }}},
+ { 0x1e1d, {1, {0x1e1c }}},
+ { 0x1e1f, {1, {0x1e1e }}},
+ { 0x1e21, {1, {0x1e20 }}},
+ { 0x1e23, {1, {0x1e22 }}},
+ { 0x1e25, {1, {0x1e24 }}},
+ { 0x1e27, {1, {0x1e26 }}},
+ { 0x1e29, {1, {0x1e28 }}},
+ { 0x1e2b, {1, {0x1e2a }}},
+ { 0x1e2d, {1, {0x1e2c }}},
+ { 0x1e2f, {1, {0x1e2e }}},
+ { 0x1e31, {1, {0x1e30 }}},
+ { 0x1e33, {1, {0x1e32 }}},
+ { 0x1e35, {1, {0x1e34 }}},
+ { 0x1e37, {1, {0x1e36 }}},
+ { 0x1e39, {1, {0x1e38 }}},
+ { 0x1e3b, {1, {0x1e3a }}},
+ { 0x1e3d, {1, {0x1e3c }}},
+ { 0x1e3f, {1, {0x1e3e }}},
+ { 0x1e41, {1, {0x1e40 }}},
+ { 0x1e43, {1, {0x1e42 }}},
+ { 0x1e45, {1, {0x1e44 }}},
+ { 0x1e47, {1, {0x1e46 }}},
+ { 0x1e49, {1, {0x1e48 }}},
+ { 0x1e4b, {1, {0x1e4a }}},
+ { 0x1e4d, {1, {0x1e4c }}},
+ { 0x1e4f, {1, {0x1e4e }}},
+ { 0x1e51, {1, {0x1e50 }}},
+ { 0x1e53, {1, {0x1e52 }}},
+ { 0x1e55, {1, {0x1e54 }}},
+ { 0x1e57, {1, {0x1e56 }}},
+ { 0x1e59, {1, {0x1e58 }}},
+ { 0x1e5b, {1, {0x1e5a }}},
+ { 0x1e5d, {1, {0x1e5c }}},
+ { 0x1e5f, {1, {0x1e5e }}},
+ { 0x1e61, {2, {0x1e9b, 0x1e60 }}},
+ { 0x1e63, {1, {0x1e62 }}},
+ { 0x1e65, {1, {0x1e64 }}},
+ { 0x1e67, {1, {0x1e66 }}},
+ { 0x1e69, {1, {0x1e68 }}},
+ { 0x1e6b, {1, {0x1e6a }}},
+ { 0x1e6d, {1, {0x1e6c }}},
+ { 0x1e6f, {1, {0x1e6e }}},
+ { 0x1e71, {1, {0x1e70 }}},
+ { 0x1e73, {1, {0x1e72 }}},
+ { 0x1e75, {1, {0x1e74 }}},
+ { 0x1e77, {1, {0x1e76 }}},
+ { 0x1e79, {1, {0x1e78 }}},
+ { 0x1e7b, {1, {0x1e7a }}},
+ { 0x1e7d, {1, {0x1e7c }}},
+ { 0x1e7f, {1, {0x1e7e }}},
+ { 0x1e81, {1, {0x1e80 }}},
+ { 0x1e83, {1, {0x1e82 }}},
+ { 0x1e85, {1, {0x1e84 }}},
+ { 0x1e87, {1, {0x1e86 }}},
+ { 0x1e89, {1, {0x1e88 }}},
+ { 0x1e8b, {1, {0x1e8a }}},
+ { 0x1e8d, {1, {0x1e8c }}},
+ { 0x1e8f, {1, {0x1e8e }}},
+ { 0x1e91, {1, {0x1e90 }}},
+ { 0x1e93, {1, {0x1e92 }}},
+ { 0x1e95, {1, {0x1e94 }}},
+ { 0x1ea1, {1, {0x1ea0 }}},
+ { 0x1ea3, {1, {0x1ea2 }}},
+ { 0x1ea5, {1, {0x1ea4 }}},
+ { 0x1ea7, {1, {0x1ea6 }}},
+ { 0x1ea9, {1, {0x1ea8 }}},
+ { 0x1eab, {1, {0x1eaa }}},
+ { 0x1ead, {1, {0x1eac }}},
+ { 0x1eaf, {1, {0x1eae }}},
+ { 0x1eb1, {1, {0x1eb0 }}},
+ { 0x1eb3, {1, {0x1eb2 }}},
+ { 0x1eb5, {1, {0x1eb4 }}},
+ { 0x1eb7, {1, {0x1eb6 }}},
+ { 0x1eb9, {1, {0x1eb8 }}},
+ { 0x1ebb, {1, {0x1eba }}},
+ { 0x1ebd, {1, {0x1ebc }}},
+ { 0x1ebf, {1, {0x1ebe }}},
+ { 0x1ec1, {1, {0x1ec0 }}},
+ { 0x1ec3, {1, {0x1ec2 }}},
+ { 0x1ec5, {1, {0x1ec4 }}},
+ { 0x1ec7, {1, {0x1ec6 }}},
+ { 0x1ec9, {1, {0x1ec8 }}},
+ { 0x1ecb, {1, {0x1eca }}},
+ { 0x1ecd, {1, {0x1ecc }}},
+ { 0x1ecf, {1, {0x1ece }}},
+ { 0x1ed1, {1, {0x1ed0 }}},
+ { 0x1ed3, {1, {0x1ed2 }}},
+ { 0x1ed5, {1, {0x1ed4 }}},
+ { 0x1ed7, {1, {0x1ed6 }}},
+ { 0x1ed9, {1, {0x1ed8 }}},
+ { 0x1edb, {1, {0x1eda }}},
+ { 0x1edd, {1, {0x1edc }}},
+ { 0x1edf, {1, {0x1ede }}},
+ { 0x1ee1, {1, {0x1ee0 }}},
+ { 0x1ee3, {1, {0x1ee2 }}},
+ { 0x1ee5, {1, {0x1ee4 }}},
+ { 0x1ee7, {1, {0x1ee6 }}},
+ { 0x1ee9, {1, {0x1ee8 }}},
+ { 0x1eeb, {1, {0x1eea }}},
+ { 0x1eed, {1, {0x1eec }}},
+ { 0x1eef, {1, {0x1eee }}},
+ { 0x1ef1, {1, {0x1ef0 }}},
+ { 0x1ef3, {1, {0x1ef2 }}},
+ { 0x1ef5, {1, {0x1ef4 }}},
+ { 0x1ef7, {1, {0x1ef6 }}},
+ { 0x1ef9, {1, {0x1ef8 }}},
+ { 0x1f00, {1, {0x1f08 }}},
+ { 0x1f01, {1, {0x1f09 }}},
+ { 0x1f02, {1, {0x1f0a }}},
+ { 0x1f03, {1, {0x1f0b }}},
+ { 0x1f04, {1, {0x1f0c }}},
+ { 0x1f05, {1, {0x1f0d }}},
+ { 0x1f06, {1, {0x1f0e }}},
+ { 0x1f07, {1, {0x1f0f }}},
+ { 0x1f10, {1, {0x1f18 }}},
+ { 0x1f11, {1, {0x1f19 }}},
+ { 0x1f12, {1, {0x1f1a }}},
+ { 0x1f13, {1, {0x1f1b }}},
+ { 0x1f14, {1, {0x1f1c }}},
+ { 0x1f15, {1, {0x1f1d }}},
+ { 0x1f20, {1, {0x1f28 }}},
+ { 0x1f21, {1, {0x1f29 }}},
+ { 0x1f22, {1, {0x1f2a }}},
+ { 0x1f23, {1, {0x1f2b }}},
+ { 0x1f24, {1, {0x1f2c }}},
+ { 0x1f25, {1, {0x1f2d }}},
+ { 0x1f26, {1, {0x1f2e }}},
+ { 0x1f27, {1, {0x1f2f }}},
+ { 0x1f30, {1, {0x1f38 }}},
+ { 0x1f31, {1, {0x1f39 }}},
+ { 0x1f32, {1, {0x1f3a }}},
+ { 0x1f33, {1, {0x1f3b }}},
+ { 0x1f34, {1, {0x1f3c }}},
+ { 0x1f35, {1, {0x1f3d }}},
+ { 0x1f36, {1, {0x1f3e }}},
+ { 0x1f37, {1, {0x1f3f }}},
+ { 0x1f40, {1, {0x1f48 }}},
+ { 0x1f41, {1, {0x1f49 }}},
+ { 0x1f42, {1, {0x1f4a }}},
+ { 0x1f43, {1, {0x1f4b }}},
+ { 0x1f44, {1, {0x1f4c }}},
+ { 0x1f45, {1, {0x1f4d }}},
+ { 0x1f51, {1, {0x1f59 }}},
+ { 0x1f53, {1, {0x1f5b }}},
+ { 0x1f55, {1, {0x1f5d }}},
+ { 0x1f57, {1, {0x1f5f }}},
+ { 0x1f60, {1, {0x1f68 }}},
+ { 0x1f61, {1, {0x1f69 }}},
+ { 0x1f62, {1, {0x1f6a }}},
+ { 0x1f63, {1, {0x1f6b }}},
+ { 0x1f64, {1, {0x1f6c }}},
+ { 0x1f65, {1, {0x1f6d }}},
+ { 0x1f66, {1, {0x1f6e }}},
+ { 0x1f67, {1, {0x1f6f }}},
+ { 0x1f70, {1, {0x1fba }}},
+ { 0x1f71, {1, {0x1fbb }}},
+ { 0x1f72, {1, {0x1fc8 }}},
+ { 0x1f73, {1, {0x1fc9 }}},
+ { 0x1f74, {1, {0x1fca }}},
+ { 0x1f75, {1, {0x1fcb }}},
+ { 0x1f76, {1, {0x1fda }}},
+ { 0x1f77, {1, {0x1fdb }}},
+ { 0x1f78, {1, {0x1ff8 }}},
+ { 0x1f79, {1, {0x1ff9 }}},
+ { 0x1f7a, {1, {0x1fea }}},
+ { 0x1f7b, {1, {0x1feb }}},
+ { 0x1f7c, {1, {0x1ffa }}},
+ { 0x1f7d, {1, {0x1ffb }}},
+ { 0x1fb0, {1, {0x1fb8 }}},
+ { 0x1fb1, {1, {0x1fb9 }}},
+ { 0x1fd0, {1, {0x1fd8 }}},
+ { 0x1fd1, {1, {0x1fd9 }}},
+ { 0x1fe0, {1, {0x1fe8 }}},
+ { 0x1fe1, {1, {0x1fe9 }}},
+ { 0x1fe5, {1, {0x1fec }}},
+ { 0x2170, {1, {0x2160 }}},
+ { 0x2171, {1, {0x2161 }}},
+ { 0x2172, {1, {0x2162 }}},
+ { 0x2173, {1, {0x2163 }}},
+ { 0x2174, {1, {0x2164 }}},
+ { 0x2175, {1, {0x2165 }}},
+ { 0x2176, {1, {0x2166 }}},
+ { 0x2177, {1, {0x2167 }}},
+ { 0x2178, {1, {0x2168 }}},
+ { 0x2179, {1, {0x2169 }}},
+ { 0x217a, {1, {0x216a }}},
+ { 0x217b, {1, {0x216b }}},
+ { 0x217c, {1, {0x216c }}},
+ { 0x217d, {1, {0x216d }}},
+ { 0x217e, {1, {0x216e }}},
+ { 0x217f, {1, {0x216f }}},
+ { 0x24d0, {1, {0x24b6 }}},
+ { 0x24d1, {1, {0x24b7 }}},
+ { 0x24d2, {1, {0x24b8 }}},
+ { 0x24d3, {1, {0x24b9 }}},
+ { 0x24d4, {1, {0x24ba }}},
+ { 0x24d5, {1, {0x24bb }}},
+ { 0x24d6, {1, {0x24bc }}},
+ { 0x24d7, {1, {0x24bd }}},
+ { 0x24d8, {1, {0x24be }}},
+ { 0x24d9, {1, {0x24bf }}},
+ { 0x24da, {1, {0x24c0 }}},
+ { 0x24db, {1, {0x24c1 }}},
+ { 0x24dc, {1, {0x24c2 }}},
+ { 0x24dd, {1, {0x24c3 }}},
+ { 0x24de, {1, {0x24c4 }}},
+ { 0x24df, {1, {0x24c5 }}},
+ { 0x24e0, {1, {0x24c6 }}},
+ { 0x24e1, {1, {0x24c7 }}},
+ { 0x24e2, {1, {0x24c8 }}},
+ { 0x24e3, {1, {0x24c9 }}},
+ { 0x24e4, {1, {0x24ca }}},
+ { 0x24e5, {1, {0x24cb }}},
+ { 0x24e6, {1, {0x24cc }}},
+ { 0x24e7, {1, {0x24cd }}},
+ { 0x24e8, {1, {0x24ce }}},
+ { 0x24e9, {1, {0x24cf }}},
+ { 0x2c30, {1, {0x2c00 }}},
+ { 0x2c31, {1, {0x2c01 }}},
+ { 0x2c32, {1, {0x2c02 }}},
+ { 0x2c33, {1, {0x2c03 }}},
+ { 0x2c34, {1, {0x2c04 }}},
+ { 0x2c35, {1, {0x2c05 }}},
+ { 0x2c36, {1, {0x2c06 }}},
+ { 0x2c37, {1, {0x2c07 }}},
+ { 0x2c38, {1, {0x2c08 }}},
+ { 0x2c39, {1, {0x2c09 }}},
+ { 0x2c3a, {1, {0x2c0a }}},
+ { 0x2c3b, {1, {0x2c0b }}},
+ { 0x2c3c, {1, {0x2c0c }}},
+ { 0x2c3d, {1, {0x2c0d }}},
+ { 0x2c3e, {1, {0x2c0e }}},
+ { 0x2c3f, {1, {0x2c0f }}},
+ { 0x2c40, {1, {0x2c10 }}},
+ { 0x2c41, {1, {0x2c11 }}},
+ { 0x2c42, {1, {0x2c12 }}},
+ { 0x2c43, {1, {0x2c13 }}},
+ { 0x2c44, {1, {0x2c14 }}},
+ { 0x2c45, {1, {0x2c15 }}},
+ { 0x2c46, {1, {0x2c16 }}},
+ { 0x2c47, {1, {0x2c17 }}},
+ { 0x2c48, {1, {0x2c18 }}},
+ { 0x2c49, {1, {0x2c19 }}},
+ { 0x2c4a, {1, {0x2c1a }}},
+ { 0x2c4b, {1, {0x2c1b }}},
+ { 0x2c4c, {1, {0x2c1c }}},
+ { 0x2c4d, {1, {0x2c1d }}},
+ { 0x2c4e, {1, {0x2c1e }}},
+ { 0x2c4f, {1, {0x2c1f }}},
+ { 0x2c50, {1, {0x2c20 }}},
+ { 0x2c51, {1, {0x2c21 }}},
+ { 0x2c52, {1, {0x2c22 }}},
+ { 0x2c53, {1, {0x2c23 }}},
+ { 0x2c54, {1, {0x2c24 }}},
+ { 0x2c55, {1, {0x2c25 }}},
+ { 0x2c56, {1, {0x2c26 }}},
+ { 0x2c57, {1, {0x2c27 }}},
+ { 0x2c58, {1, {0x2c28 }}},
+ { 0x2c59, {1, {0x2c29 }}},
+ { 0x2c5a, {1, {0x2c2a }}},
+ { 0x2c5b, {1, {0x2c2b }}},
+ { 0x2c5c, {1, {0x2c2c }}},
+ { 0x2c5d, {1, {0x2c2d }}},
+ { 0x2c5e, {1, {0x2c2e }}},
+ { 0x2c81, {1, {0x2c80 }}},
+ { 0x2c83, {1, {0x2c82 }}},
+ { 0x2c85, {1, {0x2c84 }}},
+ { 0x2c87, {1, {0x2c86 }}},
+ { 0x2c89, {1, {0x2c88 }}},
+ { 0x2c8b, {1, {0x2c8a }}},
+ { 0x2c8d, {1, {0x2c8c }}},
+ { 0x2c8f, {1, {0x2c8e }}},
+ { 0x2c91, {1, {0x2c90 }}},
+ { 0x2c93, {1, {0x2c92 }}},
+ { 0x2c95, {1, {0x2c94 }}},
+ { 0x2c97, {1, {0x2c96 }}},
+ { 0x2c99, {1, {0x2c98 }}},
+ { 0x2c9b, {1, {0x2c9a }}},
+ { 0x2c9d, {1, {0x2c9c }}},
+ { 0x2c9f, {1, {0x2c9e }}},
+ { 0x2ca1, {1, {0x2ca0 }}},
+ { 0x2ca3, {1, {0x2ca2 }}},
+ { 0x2ca5, {1, {0x2ca4 }}},
+ { 0x2ca7, {1, {0x2ca6 }}},
+ { 0x2ca9, {1, {0x2ca8 }}},
+ { 0x2cab, {1, {0x2caa }}},
+ { 0x2cad, {1, {0x2cac }}},
+ { 0x2caf, {1, {0x2cae }}},
+ { 0x2cb1, {1, {0x2cb0 }}},
+ { 0x2cb3, {1, {0x2cb2 }}},
+ { 0x2cb5, {1, {0x2cb4 }}},
+ { 0x2cb7, {1, {0x2cb6 }}},
+ { 0x2cb9, {1, {0x2cb8 }}},
+ { 0x2cbb, {1, {0x2cba }}},
+ { 0x2cbd, {1, {0x2cbc }}},
+ { 0x2cbf, {1, {0x2cbe }}},
+ { 0x2cc1, {1, {0x2cc0 }}},
+ { 0x2cc3, {1, {0x2cc2 }}},
+ { 0x2cc5, {1, {0x2cc4 }}},
+ { 0x2cc7, {1, {0x2cc6 }}},
+ { 0x2cc9, {1, {0x2cc8 }}},
+ { 0x2ccb, {1, {0x2cca }}},
+ { 0x2ccd, {1, {0x2ccc }}},
+ { 0x2ccf, {1, {0x2cce }}},
+ { 0x2cd1, {1, {0x2cd0 }}},
+ { 0x2cd3, {1, {0x2cd2 }}},
+ { 0x2cd5, {1, {0x2cd4 }}},
+ { 0x2cd7, {1, {0x2cd6 }}},
+ { 0x2cd9, {1, {0x2cd8 }}},
+ { 0x2cdb, {1, {0x2cda }}},
+ { 0x2cdd, {1, {0x2cdc }}},
+ { 0x2cdf, {1, {0x2cde }}},
+ { 0x2ce1, {1, {0x2ce0 }}},
+ { 0x2ce3, {1, {0x2ce2 }}},
+ { 0x2d00, {1, {0x10a0 }}},
+ { 0x2d01, {1, {0x10a1 }}},
+ { 0x2d02, {1, {0x10a2 }}},
+ { 0x2d03, {1, {0x10a3 }}},
+ { 0x2d04, {1, {0x10a4 }}},
+ { 0x2d05, {1, {0x10a5 }}},
+ { 0x2d06, {1, {0x10a6 }}},
+ { 0x2d07, {1, {0x10a7 }}},
+ { 0x2d08, {1, {0x10a8 }}},
+ { 0x2d09, {1, {0x10a9 }}},
+ { 0x2d0a, {1, {0x10aa }}},
+ { 0x2d0b, {1, {0x10ab }}},
+ { 0x2d0c, {1, {0x10ac }}},
+ { 0x2d0d, {1, {0x10ad }}},
+ { 0x2d0e, {1, {0x10ae }}},
+ { 0x2d0f, {1, {0x10af }}},
+ { 0x2d10, {1, {0x10b0 }}},
+ { 0x2d11, {1, {0x10b1 }}},
+ { 0x2d12, {1, {0x10b2 }}},
+ { 0x2d13, {1, {0x10b3 }}},
+ { 0x2d14, {1, {0x10b4 }}},
+ { 0x2d15, {1, {0x10b5 }}},
+ { 0x2d16, {1, {0x10b6 }}},
+ { 0x2d17, {1, {0x10b7 }}},
+ { 0x2d18, {1, {0x10b8 }}},
+ { 0x2d19, {1, {0x10b9 }}},
+ { 0x2d1a, {1, {0x10ba }}},
+ { 0x2d1b, {1, {0x10bb }}},
+ { 0x2d1c, {1, {0x10bc }}},
+ { 0x2d1d, {1, {0x10bd }}},
+ { 0x2d1e, {1, {0x10be }}},
+ { 0x2d1f, {1, {0x10bf }}},
+ { 0x2d20, {1, {0x10c0 }}},
+ { 0x2d21, {1, {0x10c1 }}},
+ { 0x2d22, {1, {0x10c2 }}},
+ { 0x2d23, {1, {0x10c3 }}},
+ { 0x2d24, {1, {0x10c4 }}},
+ { 0x2d25, {1, {0x10c5 }}},
+ { 0xff41, {1, {0xff21 }}},
+ { 0xff42, {1, {0xff22 }}},
+ { 0xff43, {1, {0xff23 }}},
+ { 0xff44, {1, {0xff24 }}},
+ { 0xff45, {1, {0xff25 }}},
+ { 0xff46, {1, {0xff26 }}},
+ { 0xff47, {1, {0xff27 }}},
+ { 0xff48, {1, {0xff28 }}},
+ { 0xff49, {1, {0xff29 }}},
+ { 0xff4a, {1, {0xff2a }}},
+ { 0xff4b, {1, {0xff2b }}},
+ { 0xff4c, {1, {0xff2c }}},
+ { 0xff4d, {1, {0xff2d }}},
+ { 0xff4e, {1, {0xff2e }}},
+ { 0xff4f, {1, {0xff2f }}},
+ { 0xff50, {1, {0xff30 }}},
+ { 0xff51, {1, {0xff31 }}},
+ { 0xff52, {1, {0xff32 }}},
+ { 0xff53, {1, {0xff33 }}},
+ { 0xff54, {1, {0xff34 }}},
+ { 0xff55, {1, {0xff35 }}},
+ { 0xff56, {1, {0xff36 }}},
+ { 0xff57, {1, {0xff37 }}},
+ { 0xff58, {1, {0xff38 }}},
+ { 0xff59, {1, {0xff39 }}},
+ { 0xff5a, {1, {0xff3a }}},
+ { 0x10428, {1, {0x10400 }}},
+ { 0x10429, {1, {0x10401 }}},
+ { 0x1042a, {1, {0x10402 }}},
+ { 0x1042b, {1, {0x10403 }}},
+ { 0x1042c, {1, {0x10404 }}},
+ { 0x1042d, {1, {0x10405 }}},
+ { 0x1042e, {1, {0x10406 }}},
+ { 0x1042f, {1, {0x10407 }}},
+ { 0x10430, {1, {0x10408 }}},
+ { 0x10431, {1, {0x10409 }}},
+ { 0x10432, {1, {0x1040a }}},
+ { 0x10433, {1, {0x1040b }}},
+ { 0x10434, {1, {0x1040c }}},
+ { 0x10435, {1, {0x1040d }}},
+ { 0x10436, {1, {0x1040e }}},
+ { 0x10437, {1, {0x1040f }}},
+ { 0x10438, {1, {0x10410 }}},
+ { 0x10439, {1, {0x10411 }}},
+ { 0x1043a, {1, {0x10412 }}},
+ { 0x1043b, {1, {0x10413 }}},
+ { 0x1043c, {1, {0x10414 }}},
+ { 0x1043d, {1, {0x10415 }}},
+ { 0x1043e, {1, {0x10416 }}},
+ { 0x1043f, {1, {0x10417 }}},
+ { 0x10440, {1, {0x10418 }}},
+ { 0x10441, {1, {0x10419 }}},
+ { 0x10442, {1, {0x1041a }}},
+ { 0x10443, {1, {0x1041b }}},
+ { 0x10444, {1, {0x1041c }}},
+ { 0x10445, {1, {0x1041d }}},
+ { 0x10446, {1, {0x1041e }}},
+ { 0x10447, {1, {0x1041f }}},
+ { 0x10448, {1, {0x10420 }}},
+ { 0x10449, {1, {0x10421 }}},
+ { 0x1044a, {1, {0x10422 }}},
+ { 0x1044b, {1, {0x10423 }}},
+ { 0x1044c, {1, {0x10424 }}},
+ { 0x1044d, {1, {0x10425 }}},
+ { 0x1044e, {1, {0x10426 }}},
+ { 0x1044f, {1, {0x10427 }}}
+};
+
+static const CaseUnfold_11_Type CaseUnfold_11_Locale[] = {
+ { 0x0069, {1, {0x0049 }}}
+};
+
+static const CaseUnfold_12_Type CaseUnfold_12[] = {
+ { {0x0061, 0x02be}, {1, {0x1e9a }}},
+ { {0x0066, 0x0066}, {1, {0xfb00 }}},
+ { {0x0066, 0x0069}, {1, {0xfb01 }}},
+ { {0x0066, 0x006c}, {1, {0xfb02 }}},
+ { {0x0068, 0x0331}, {1, {0x1e96 }}},
+ { {0x006a, 0x030c}, {1, {0x01f0 }}},
+ { {0x0073, 0x0073}, {1, {0x00df }}},
+ { {0x0073, 0x0074}, {2, {0xfb05, 0xfb06 }}},
+ { {0x0074, 0x0308}, {1, {0x1e97 }}},
+ { {0x0077, 0x030a}, {1, {0x1e98 }}},
+ { {0x0079, 0x030a}, {1, {0x1e99 }}},
+ { {0x02bc, 0x006e}, {1, {0x0149 }}},
+ { {0x03ac, 0x03b9}, {1, {0x1fb4 }}},
+ { {0x03ae, 0x03b9}, {1, {0x1fc4 }}},
+ { {0x03b1, 0x0342}, {1, {0x1fb6 }}},
+ { {0x03b1, 0x03b9}, {2, {0x1fb3, 0x1fbc }}},
+ { {0x03b7, 0x0342}, {1, {0x1fc6 }}},
+ { {0x03b7, 0x03b9}, {2, {0x1fc3, 0x1fcc }}},
+ { {0x03b9, 0x0342}, {1, {0x1fd6 }}},
+ { {0x03c1, 0x0313}, {1, {0x1fe4 }}},
+ { {0x03c5, 0x0313}, {1, {0x1f50 }}},
+ { {0x03c5, 0x0342}, {1, {0x1fe6 }}},
+ { {0x03c9, 0x0342}, {1, {0x1ff6 }}},
+ { {0x03c9, 0x03b9}, {2, {0x1ff3, 0x1ffc }}},
+ { {0x03ce, 0x03b9}, {1, {0x1ff4 }}},
+ { {0x0565, 0x0582}, {1, {0x0587 }}},
+ { {0x0574, 0x0565}, {1, {0xfb14 }}},
+ { {0x0574, 0x056b}, {1, {0xfb15 }}},
+ { {0x0574, 0x056d}, {1, {0xfb17 }}},
+ { {0x0574, 0x0576}, {1, {0xfb13 }}},
+ { {0x057e, 0x0576}, {1, {0xfb16 }}},
+ { {0x1f00, 0x03b9}, {2, {0x1f88, 0x1f80 }}},
+ { {0x1f01, 0x03b9}, {2, {0x1f81, 0x1f89 }}},
+ { {0x1f02, 0x03b9}, {2, {0x1f82, 0x1f8a }}},
+ { {0x1f03, 0x03b9}, {2, {0x1f83, 0x1f8b }}},
+ { {0x1f04, 0x03b9}, {2, {0x1f84, 0x1f8c }}},
+ { {0x1f05, 0x03b9}, {2, {0x1f85, 0x1f8d }}},
+ { {0x1f06, 0x03b9}, {2, {0x1f86, 0x1f8e }}},
+ { {0x1f07, 0x03b9}, {2, {0x1f87, 0x1f8f }}},
+ { {0x1f20, 0x03b9}, {2, {0x1f90, 0x1f98 }}},
+ { {0x1f21, 0x03b9}, {2, {0x1f91, 0x1f99 }}},
+ { {0x1f22, 0x03b9}, {2, {0x1f92, 0x1f9a }}},
+ { {0x1f23, 0x03b9}, {2, {0x1f93, 0x1f9b }}},
+ { {0x1f24, 0x03b9}, {2, {0x1f94, 0x1f9c }}},
+ { {0x1f25, 0x03b9}, {2, {0x1f95, 0x1f9d }}},
+ { {0x1f26, 0x03b9}, {2, {0x1f96, 0x1f9e }}},
+ { {0x1f27, 0x03b9}, {2, {0x1f97, 0x1f9f }}},
+ { {0x1f60, 0x03b9}, {2, {0x1fa0, 0x1fa8 }}},
+ { {0x1f61, 0x03b9}, {2, {0x1fa1, 0x1fa9 }}},
+ { {0x1f62, 0x03b9}, {2, {0x1fa2, 0x1faa }}},
+ { {0x1f63, 0x03b9}, {2, {0x1fa3, 0x1fab }}},
+ { {0x1f64, 0x03b9}, {2, {0x1fa4, 0x1fac }}},
+ { {0x1f65, 0x03b9}, {2, {0x1fa5, 0x1fad }}},
+ { {0x1f66, 0x03b9}, {2, {0x1fa6, 0x1fae }}},
+ { {0x1f67, 0x03b9}, {2, {0x1fa7, 0x1faf }}},
+ { {0x1f70, 0x03b9}, {1, {0x1fb2 }}},
+ { {0x1f74, 0x03b9}, {1, {0x1fc2 }}},
+ { {0x1f7c, 0x03b9}, {1, {0x1ff2 }}}
+};
+
+static const CaseUnfold_12_Type CaseUnfold_12_Locale[] = {
+ { {0x0069, 0x0307}, {1, {0x0130 }}}
+};
+
+static const CaseUnfold_13_Type CaseUnfold_13[] = {
+ { {0x0066, 0x0066, 0x0069}, {1, {0xfb03 }}},
+ { {0x0066, 0x0066, 0x006c}, {1, {0xfb04 }}},
+ { {0x03b1, 0x0342, 0x03b9}, {1, {0x1fb7 }}},
+ { {0x03b7, 0x0342, 0x03b9}, {1, {0x1fc7 }}},
+ { {0x03b9, 0x0308, 0x0300}, {1, {0x1fd2 }}},
+ { {0x03b9, 0x0308, 0x0301}, {2, {0x0390, 0x1fd3 }}},
+ { {0x03b9, 0x0308, 0x0342}, {1, {0x1fd7 }}},
+ { {0x03c5, 0x0308, 0x0300}, {1, {0x1fe2 }}},
+ { {0x03c5, 0x0308, 0x0301}, {2, {0x03b0, 0x1fe3 }}},
+ { {0x03c5, 0x0308, 0x0342}, {1, {0x1fe7 }}},
+ { {0x03c5, 0x0313, 0x0300}, {1, {0x1f52 }}},
+ { {0x03c5, 0x0313, 0x0301}, {1, {0x1f54 }}},
+ { {0x03c5, 0x0313, 0x0342}, {1, {0x1f56 }}},
+ { {0x03c9, 0x0342, 0x03b9}, {1, {0x1ff7 }}}
+};
+
+
+static PosixBracketEntryType HashEntryData[] = {
+  { (UChar* )"NEWLINE",                0,  7 },
+  { (UChar* )"Alpha",                  1,  5 },
+  { (UChar* )"Blank",                  2,  5 },
+  { (UChar* )"Cntrl",                  3,  5 },
+  { (UChar* )"Digit",                  4,  5 },
+  { (UChar* )"Graph",                  5,  5 },
+  { (UChar* )"Lower",                  6,  5 },
+  { (UChar* )"Print",                  7,  5 },
+  { (UChar* )"Punct",                  8,  5 },
+  { (UChar* )"Space",                  9,  5 },
+  { (UChar* )"Upper",                 10,  5 },
+  { (UChar* )"XDigit",                11,  6 },
+  { (UChar* )"Word",                  12,  4 },
+  { (UChar* )"Alnum",                 13,  5 },
+  { (UChar* )"ASCII",                 14,  5 },
+
+#ifdef USE_UNICODE_PROPERTIES
+  { (UChar* )"Any",                   15,  3 },
+  { (UChar* )"Assigned",              16,  8 },
+  { (UChar* )"C",                     17,  1 },
+  { (UChar* )"Cc",                    18,  2 },
+  { (UChar* )"Cf",                    19,  2 },
+  { (UChar* )"Cn",                    20,  2 },
+  { (UChar* )"Co",                    21,  2 },
+  { (UChar* )"Cs",                    22,  2 },
+  { (UChar* )"L",                     23,  1 },
+  { (UChar* )"Ll",                    24,  2 },
+  { (UChar* )"Lm",                    25,  2 },
+  { (UChar* )"Lo",                    26,  2 },
+  { (UChar* )"Lt",                    27,  2 },
+  { (UChar* )"Lu",                    28,  2 },
+  { (UChar* )"M",                     29,  1 },
+  { (UChar* )"Mc",                    30,  2 },
+  { (UChar* )"Me",                    31,  2 },
+  { (UChar* )"Mn",                    32,  2 },
+  { (UChar* )"N",                     33,  1 },
+  { (UChar* )"Nd",                    34,  2 },
+  { (UChar* )"Nl",                    35,  2 },
+  { (UChar* )"No",                    36,  2 },
+  { (UChar* )"P",                     37,  1 },
+  { (UChar* )"Pc",                    38,  2 },
+  { (UChar* )"Pd",                    39,  2 },
+  { (UChar* )"Pe",                    40,  2 },
+  { (UChar* )"Pf",                    41,  2 },
+  { (UChar* )"Pi",                    42,  2 },
+  { (UChar* )"Po",                    43,  2 },
+  { (UChar* )"Ps",                    44,  2 },
+  { (UChar* )"S",                     45,  1 },
+  { (UChar* )"Sc",                    46,  2 },
+  { (UChar* )"Sk",                    47,  2 },
+  { (UChar* )"Sm",                    48,  2 },
+  { (UChar* )"So",                    49,  2 },
+  { (UChar* )"Z",                     50,  1 },
+  { (UChar* )"Zl",                    51,  2 },
+  { (UChar* )"Zp",                    52,  2 },
+  { (UChar* )"Zs",                    53,  2 },
+  { (UChar* )"Arabic",                54,  6 },
+  { (UChar* )"Armenian",              55,  8 },
+  { (UChar* )"Bengali",               56,  7 },
+  { (UChar* )"Bopomofo",              57,  8 },
+  { (UChar* )"Braille",               58,  7 },
+  { (UChar* )"Buginese",              59,  8 },
+  { (UChar* )"Buhid",                 60,  5 },
+  { (UChar* )"Canadian_Aboriginal",   61, 19 },
+  { (UChar* )"Cherokee",              62,  8 },
+  { (UChar* )"Common",                63,  6 },
+  { (UChar* )"Coptic",                64,  6 },
+  { (UChar* )"Cypriot",               65,  7 },
+  { (UChar* )"Cyrillic",              66,  8 },
+  { (UChar* )"Deseret",               67,  7 },
+  { (UChar* )"Devanagari",            68, 10 },
+  { (UChar* )"Ethiopic",              69,  8 },
+  { (UChar* )"Georgian",              70,  8 },
+  { (UChar* )"Glagolitic",            71, 10 },
+  { (UChar* )"Gothic",                72,  6 },
+  { (UChar* )"Greek",                 73,  5 },
+  { (UChar* )"Gujarati",              74,  8 },
+  { (UChar* )"Gurmukhi",              75,  8 },
+  { (UChar* )"Han",                   76,  3 },
+  { (UChar* )"Hangul",                77,  6 },
+  { (UChar* )"Hanunoo",               78,  7 },
+  { (UChar* )"Hebrew",                79,  6 },
+  { (UChar* )"Hiragana",              80,  8 },
+  { (UChar* )"Inherited",             81,  9 },
+  { (UChar* )"Kannada",               82,  7 },
+  { (UChar* )"Katakana",              83,  8 },
+  { (UChar* )"Kharoshthi",            84, 10 },
+  { (UChar* )"Khmer",                 85,  5 },
+  { (UChar* )"Lao",                   86,  3 },
+  { (UChar* )"Latin",                 87,  5 },
+  { (UChar* )"Limbu",                 88,  5 },
+  { (UChar* )"Linear_B",              89,  8 },
+  { (UChar* )"Malayalam",             90,  9 },
+  { (UChar* )"Mongolian",             91,  9 },
+  { (UChar* )"Myanmar",               92,  7 },
+  { (UChar* )"New_Tai_Lue",           93, 11 },
+  { (UChar* )"Ogham",                 94,  5 },
+  { (UChar* )"Old_Italic",            95, 10 },
+  { (UChar* )"Old_Persian",           96, 11 },
+  { (UChar* )"Oriya",                 97,  5 },
+  { (UChar* )"Osmanya",               98,  7 },
+  { (UChar* )"Runic",                 99,  5 },
+  { (UChar* )"Shavian",              100,  7 },
+  { (UChar* )"Sinhala",              101,  7 },
+  { (UChar* )"Syloti_Nagri",         102, 12 },
+  { (UChar* )"Syriac",               103,  6 },
+  { (UChar* )"Tagalog",              104,  7 },
+  { (UChar* )"Tagbanwa",             105,  8 },
+  { (UChar* )"Tai_Le",               106,  6 },
+  { (UChar* )"Tamil",                107,  5 },
+  { (UChar* )"Telugu",               108,  6 },
+  { (UChar* )"Thaana",               109,  6 },
+  { (UChar* )"Thai",                 110,  4 },
+  { (UChar* )"Tibetan",              111,  7 },
+  { (UChar* )"Tifinagh",             112,  8 },
+  { (UChar* )"Ugaritic",             113,  8 },
+  { (UChar* )"Yi",                   114,  2 },
+#endif /* USE_UNICODE_PROPERTIES */
+  { (UChar* )NULL,                    -1,  0 }
+};
+
+#ifdef USE_UNICODE_PROPERTIES
+#define CODE_RANGES_NUM     115
+#else
+#define CODE_RANGES_NUM      15
+#endif
+
+static const OnigCodePoint* CodeRanges[CODE_RANGES_NUM];
+static int CodeRangeTableInited = 0;
+
+static void init_code_range_array(void) {
+  THREAD_ATOMIC_START;
+
+  CodeRanges[0] = CR_NEWLINE;
+  CodeRanges[1] = CR_Alpha;
+  CodeRanges[2] = CR_Blank;
+  CodeRanges[3] = CR_Cntrl;
+  CodeRanges[4] = CR_Digit;
+  CodeRanges[5] = CR_Graph;
+  CodeRanges[6] = CR_Lower;
+  CodeRanges[7] = CR_Print;
+  CodeRanges[8] = CR_Punct;
+  CodeRanges[9] = CR_Space;
+  CodeRanges[10] = CR_Upper;
+  CodeRanges[11] = CR_XDigit;
+  CodeRanges[12] = CR_Word;
+  CodeRanges[13] = CR_Alnum;
+  CodeRanges[14] = CR_ASCII;
+
+#ifdef USE_UNICODE_PROPERTIES
+  CodeRanges[15] = CR_Any;
+  CodeRanges[16] = CR_Assigned;
+  CodeRanges[17] = CR_C;
+  CodeRanges[18] = CR_Cc;
+  CodeRanges[19] = CR_Cf;
+  CodeRanges[20] = CR_Cn;
+  CodeRanges[21] = CR_Co;
+  CodeRanges[22] = CR_Cs;
+  CodeRanges[23] = CR_L;
+  CodeRanges[24] = CR_Ll;
+  CodeRanges[25] = CR_Lm;
+  CodeRanges[26] = CR_Lo;
+  CodeRanges[27] = CR_Lt;
+  CodeRanges[28] = CR_Lu;
+  CodeRanges[29] = CR_M;
+  CodeRanges[30] = CR_Mc;
+  CodeRanges[31] = CR_Me;
+  CodeRanges[32] = CR_Mn;
+  CodeRanges[33] = CR_N;
+  CodeRanges[34] = CR_Nd;
+  CodeRanges[35] = CR_Nl;
+  CodeRanges[36] = CR_No;
+  CodeRanges[37] = CR_P;
+  CodeRanges[38] = CR_Pc;
+  CodeRanges[39] = CR_Pd;
+  CodeRanges[40] = CR_Pe;
+  CodeRanges[41] = CR_Pf;
+  CodeRanges[42] = CR_Pi;
+  CodeRanges[43] = CR_Po;
+  CodeRanges[44] = CR_Ps;
+  CodeRanges[45] = CR_S;
+  CodeRanges[46] = CR_Sc;
+  CodeRanges[47] = CR_Sk;
+  CodeRanges[48] = CR_Sm;
+  CodeRanges[49] = CR_So;
+  CodeRanges[50] = CR_Z;
+  CodeRanges[51] = CR_Zl;
+  CodeRanges[52] = CR_Zp;
+  CodeRanges[53] = CR_Zs;
+  CodeRanges[54] = CR_Arabic;
+  CodeRanges[55] = CR_Armenian;
+  CodeRanges[56] = CR_Bengali;
+  CodeRanges[57] = CR_Bopomofo;
+  CodeRanges[58] = CR_Braille;
+  CodeRanges[59] = CR_Buginese;
+  CodeRanges[60] = CR_Buhid;
+  CodeRanges[61] = CR_Canadian_Aboriginal;
+  CodeRanges[62] = CR_Cherokee;
+  CodeRanges[63] = CR_Common;
+  CodeRanges[64] = CR_Coptic;
+  CodeRanges[65] = CR_Cypriot;
+  CodeRanges[66] = CR_Cyrillic;
+  CodeRanges[67] = CR_Deseret;
+  CodeRanges[68] = CR_Devanagari;
+  CodeRanges[69] = CR_Ethiopic;
+  CodeRanges[70] = CR_Georgian;
+  CodeRanges[71] = CR_Glagolitic;
+  CodeRanges[72] = CR_Gothic;
+  CodeRanges[73] = CR_Greek;
+  CodeRanges[74] = CR_Gujarati;
+  CodeRanges[75] = CR_Gurmukhi;
+  CodeRanges[76] = CR_Han;
+  CodeRanges[77] = CR_Hangul;
+  CodeRanges[78] = CR_Hanunoo;
+  CodeRanges[79] = CR_Hebrew;
+  CodeRanges[80] = CR_Hiragana;
+  CodeRanges[81] = CR_Inherited;
+  CodeRanges[82] = CR_Kannada;
+  CodeRanges[83] = CR_Katakana;
+  CodeRanges[84] = CR_Kharoshthi;
+  CodeRanges[85] = CR_Khmer;
+  CodeRanges[86] = CR_Lao;
+  CodeRanges[87] = CR_Latin;
+  CodeRanges[88] = CR_Limbu;
+  CodeRanges[89] = CR_Linear_B;
+  CodeRanges[90] = CR_Malayalam;
+  CodeRanges[91] = CR_Mongolian;
+  CodeRanges[92] = CR_Myanmar;
+  CodeRanges[93] = CR_New_Tai_Lue;
+  CodeRanges[94] = CR_Ogham;
+  CodeRanges[95] = CR_Old_Italic;
+  CodeRanges[96] = CR_Old_Persian;
+  CodeRanges[97] = CR_Oriya;
+  CodeRanges[98] = CR_Osmanya;
+  CodeRanges[99] = CR_Runic;
+  CodeRanges[100] = CR_Shavian;
+  CodeRanges[101] = CR_Sinhala;
+  CodeRanges[102] = CR_Syloti_Nagri;
+  CodeRanges[103] = CR_Syriac;
+  CodeRanges[104] = CR_Tagalog;
+  CodeRanges[105] = CR_Tagbanwa;
+  CodeRanges[106] = CR_Tai_Le;
+  CodeRanges[107] = CR_Tamil;
+  CodeRanges[108] = CR_Telugu;
+  CodeRanges[109] = CR_Thaana;
+  CodeRanges[110] = CR_Thai;
+  CodeRanges[111] = CR_Tibetan;
+  CodeRanges[112] = CR_Tifinagh;
+  CodeRanges[113] = CR_Ugaritic;
+  CodeRanges[114] = CR_Yi;
+#endif /* USE_UNICODE_PROPERTIES */
+
+  CodeRangeTableInited = 1;
+  THREAD_ATOMIC_END;
+}
+
+extern int
+onigenc_unicode_is_code_ctype(OnigCodePoint code, unsigned int ctype)
+{
+  if (
+#ifdef USE_UNICODE_PROPERTIES
+      ctype <= ONIGENC_MAX_STD_CTYPE &&
+#endif
+      code < 256) {
+    return ONIGENC_IS_UNICODE_ISO_8859_1_CTYPE(code, ctype);
+  }
+
+  if (ctype >= CODE_RANGES_NUM) {
+    return ONIGERR_TYPE_BUG;
+  }
+
+  if (CodeRangeTableInited == 0) init_code_range_array();
+
+  return onig_is_in_code_range((UChar* )CodeRanges[ctype], code);
+}
+
+
+extern int
+onigenc_unicode_ctype_code_range(int ctype, const OnigCodePoint* ranges[])
+{
+  if (ctype >= CODE_RANGES_NUM) {
+    return ONIGERR_TYPE_BUG;
+  }
+
+  if (CodeRangeTableInited == 0) init_code_range_array();
+
+  *ranges = CodeRanges[ctype];
+
+  return 0;
+}
+
+extern int
+onigenc_utf16_32_get_ctype_code_range(OnigCtype ctype, OnigCodePoint* sb_out,
+                                      const OnigCodePoint* ranges[])
+{
+  *sb_out = 0x00;
+  return onigenc_unicode_ctype_code_range(ctype, ranges);
+}
+
+#include "st.h"
+
+#define PROPERTY_NAME_MAX_SIZE    20
+
+static st_table* NameCtypeTable;
+static int NameTableInited = 0;
+
+static int init_name_ctype_table(void)
+{
+  PosixBracketEntryType *pb;
+
+  THREAD_ATOMIC_START;
+
+  NameCtypeTable = onig_st_init_strend_table_with_size(100);
+  if (ONIG_IS_NULL(NameCtypeTable)) return ONIGERR_MEMORY;
+
+  for (pb = HashEntryData; ONIG_IS_NOT_NULL(pb->name); pb++) {
+    onig_st_insert_strend(NameCtypeTable, pb->name, pb->name + pb->len,
+                         (st_data_t )pb->ctype);
+  }
+
+  NameTableInited = 1;
+  THREAD_ATOMIC_END;
+  return 0;
+}
+
+extern int
+onigenc_unicode_property_name_to_ctype(OnigEncoding enc, UChar* name, UChar* end)
+{
+  int len;
+  hash_data_type ctype;
+  UChar buf[PROPERTY_NAME_MAX_SIZE];
+  UChar *p;
+  OnigCodePoint code;
+
+  p = name;
+  len = 0;
+  while (p < end) {
+    code = ONIGENC_MBC_TO_CODE(enc, p, end);
+    if (code >= 0x80)
+      return ONIGERR_INVALID_CHAR_PROPERTY_NAME;
+
+    buf[len++] = (UChar )code;
+    if (len >= PROPERTY_NAME_MAX_SIZE)
+      return ONIGERR_INVALID_CHAR_PROPERTY_NAME;
+
+    p += enclen(enc, p);
+  }
+
+  buf[len] = 0;
+
+  if (NameTableInited == 0)  init_name_ctype_table();
+
+  if (onig_st_lookup_strend(NameCtypeTable, buf, buf + len, &ctype) == 0) {
+    return ONIGERR_INVALID_CHAR_PROPERTY_NAME;
+  }
+
+  return (int )ctype;
+}
+
+
+static int
+code2_cmp(OnigCodePoint* x, OnigCodePoint* y)
+{
+  if (x[0] == y[0] && x[1] == y[1]) return 0;
+  return 1;
+}
+
+static int
+code2_hash(OnigCodePoint* x)
+{
+  return (int )(x[0] + x[1]);
+}
+
+static struct st_hash_type type_code2_hash = {
+  code2_cmp,
+  code2_hash,
+};
+
+static int
+code3_cmp(OnigCodePoint* x, OnigCodePoint* y)
+{
+  if (x[0] == y[0] && x[1] == y[1] && x[2] == y[2]) return 0;
+  return 1;
+}
+
+static int
+code3_hash(OnigCodePoint* x)
+{
+  return (int )(x[0] + x[1] + x[2]);
+}
+
+static struct st_hash_type type_code3_hash = {
+  code3_cmp,
+  code3_hash,
+};
+
+
+static st_table* FoldTable;    /* fold-1, fold-2, fold-3 */
+static st_table* Unfold1Table;
+static st_table* Unfold2Table;
+static st_table* Unfold3Table;
+static int CaseFoldInited = 0;
+
+static int init_case_fold_table(void)
+{
+  const CaseFold_11_Type   *p;
+  const CaseUnfold_11_Type *p1;
+  const CaseUnfold_12_Type *p2;
+  const CaseUnfold_13_Type *p3;
+  int i;
+
+  THREAD_ATOMIC_START;
+
+  FoldTable = st_init_numtable_with_size(1200);
+  if (ONIG_IS_NULL(FoldTable)) return ONIGERR_MEMORY;
+  for (i = 0; i < (int )(sizeof(CaseFold)/sizeof(CaseFold_11_Type)); i++) {
+    p = &CaseFold[i];
+    st_add_direct(FoldTable, (st_data_t )p->from, (st_data_t )&(p->to));
+  }
+  for (i = 0; i < (int )(sizeof(CaseFold_Locale)/sizeof(CaseFold_11_Type));
+       i++) {
+    p = &CaseFold_Locale[i];
+    st_add_direct(FoldTable, (st_data_t )p->from, (st_data_t )&(p->to));
+  }
+
+  Unfold1Table = st_init_numtable_with_size(1000);
+  if (ONIG_IS_NULL(Unfold1Table)) return ONIGERR_MEMORY;
+
+  for (i = 0; i < (int )(sizeof(CaseUnfold_11)/sizeof(CaseUnfold_11_Type));
+       i++) {
+    p1 = &CaseUnfold_11[i];
+    st_add_direct(Unfold1Table, (st_data_t )p1->from, (st_data_t )&(p1->to));
+  }
+  for (i = 0;
+       i < (int )(sizeof(CaseUnfold_11_Locale)/sizeof(CaseUnfold_11_Type));
+       i++) {
+    p1 = &CaseUnfold_11_Locale[i];
+    st_add_direct(Unfold1Table, (st_data_t )p1->from, (st_data_t )&(p1->to));
+  }
+
+  Unfold2Table = st_init_table_with_size(&type_code2_hash, 200);
+  if (ONIG_IS_NULL(Unfold2Table)) return ONIGERR_MEMORY;
+
+  for (i = 0; i < (int )(sizeof(CaseUnfold_12)/sizeof(CaseUnfold_12_Type));
+       i++) {
+    p2 = &CaseUnfold_12[i];
+    st_add_direct(Unfold2Table, (st_data_t )p2->from, (st_data_t )(&p2->to));
+  }
+  for (i = 0;
+       i < (int )(sizeof(CaseUnfold_12_Locale)/sizeof(CaseUnfold_12_Type));
+       i++) {
+    p2 = &CaseUnfold_12_Locale[i];
+    st_add_direct(Unfold2Table, (st_data_t )p2->from, (st_data_t )(&p2->to));
+  }
+
+  Unfold3Table = st_init_table_with_size(&type_code3_hash, 30);
+  if (ONIG_IS_NULL(Unfold3Table)) return ONIGERR_MEMORY;
+
+  for (i = 0; i < (int )(sizeof(CaseUnfold_13)/sizeof(CaseUnfold_13_Type));
+       i++) {
+    p3 = &CaseUnfold_13[i];
+    st_add_direct(Unfold3Table, (st_data_t )p3->from, (st_data_t )(&p3->to));
+  }
+
+  CaseFoldInited = 1;
+  THREAD_ATOMIC_END;
+  return 0;
+}
+
+extern int
+onigenc_unicode_mbc_case_fold(OnigEncoding enc,
+    OnigCaseFoldType flag ARG_UNUSED, const UChar** pp, const UChar* end,
+    UChar* fold)
+{
+  CodePointList3 *to;
+  OnigCodePoint code;
+  int i, len, rlen;
+  const UChar *p = *pp;
+
+  if (CaseFoldInited == 0) init_case_fold_table();
+
+  code = ONIGENC_MBC_TO_CODE(enc, p, end);
+  len = enclen(enc, p);
+  *pp += len;
+
+#ifdef USE_UNICODE_CASE_FOLD_TURKISH_AZERI
+  if ((flag & ONIGENC_CASE_FOLD_TURKISH_AZERI) != 0) {
+    if (code == 0x0049) {
+      return ONIGENC_CODE_TO_MBC(enc, 0x0131, fold);
+    }
+    else if (code == 0x0130) {
+      return ONIGENC_CODE_TO_MBC(enc, 0x0069, fold);
+    }
+  }
+#endif
+
+  if (onig_st_lookup(FoldTable, (st_data_t )code, (void* )&to) != 0) {
+    if (to->n == 1) {
+      return ONIGENC_CODE_TO_MBC(enc, to->code[0], fold);      
+    }
+#if 0
+    /* NO NEEDS TO CHECK */
+    else if ((flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) {
+#else
+    else {
+#endif
+      rlen = 0;
+      for (i = 0; i < to->n; i++) {
+       len = ONIGENC_CODE_TO_MBC(enc, to->code[i], fold);
+       fold += len;
+       rlen += len;
+      }
+      return rlen;
+    }
+  }
+
+  for (i = 0; i < len; i++) {
+    *fold++ = *p++;
+  }
+  return len;
+}
+
+extern int
+onigenc_unicode_apply_all_case_fold(OnigCaseFoldType flag,
+                                   OnigApplyAllCaseFoldFunc f, void* arg)
+{
+  const CaseUnfold_11_Type* p11;
+  OnigCodePoint code;
+  int i, j, k, r;
+
+  /* if (CaseFoldInited == 0) init_case_fold_table(); */
+
+  for (i = 0; i < (int )(sizeof(CaseUnfold_11)/sizeof(CaseUnfold_11_Type));
+       i++) {
+    p11 = &CaseUnfold_11[i];
+    for (j = 0; j < p11->to.n; j++) {
+      code = p11->from;
+      r = (*f)(p11->to.code[j], &code, 1, arg);
+      if (r != 0) return r;
+
+      code = p11->to.code[j];
+      r = (*f)(p11->from, &code, 1, arg);
+      if (r != 0) return r;
+
+      for (k = 0; k < j; k++) {
+       r = (*f)(p11->to.code[j], (OnigCodePoint* )(&p11->to.code[k]), 1, arg);
+       if (r != 0) return r;
+
+       r = (*f)(p11->to.code[k], (OnigCodePoint* )(&p11->to.code[j]), 1, arg);
+       if (r != 0) return r;
+      }
+    }
+  }
+
+#ifdef USE_UNICODE_CASE_FOLD_TURKISH_AZERI
+  if ((flag & ONIGENC_CASE_FOLD_TURKISH_AZERI) != 0) {
+    code = 0x0131;
+    r = (*f)(0x0049, &code, 1, arg);
+    if (r != 0) return r;
+    code = 0x0049;
+    r = (*f)(0x0131, &code, 1, arg);
+    if (r != 0) return r;
+
+    code = 0x0130;
+    r = (*f)(0x0069, &code, 1, arg);
+    if (r != 0) return r;
+    code = 0x0069;
+    r = (*f)(0x0130, &code, 1, arg);
+    if (r != 0) return r;
+  }
+  else {
+#endif
+    for (i = 0;
+        i < (int )(sizeof(CaseUnfold_11_Locale)/sizeof(CaseUnfold_11_Type));
+        i++) {
+      p11 = &CaseUnfold_11_Locale[i];
+      for (j = 0; j < p11->to.n; j++) {
+       code = p11->from;
+       r = (*f)(p11->to.code[j], &code, 1, arg);
+       if (r != 0) return r;
+
+       code = p11->to.code[j];
+       r = (*f)(p11->from, &code, 1, arg);
+       if (r != 0) return r;
+
+       for (k = 0; k < j; k++) {
+         r = (*f)(p11->to.code[j], (OnigCodePoint* )(&p11->to.code[k]),
+                  1, arg);
+         if (r != 0) return r;
+
+         r = (*f)(p11->to.code[k], (OnigCodePoint* )(&p11->to.code[j]),
+                  1, arg);
+         if (r != 0) return r;
+       }
+      }
+    }
+#ifdef USE_UNICODE_CASE_FOLD_TURKISH_AZERI
+  }
+#endif
+
+  if ((flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) {
+    for (i = 0; i < (int )(sizeof(CaseUnfold_12)/sizeof(CaseUnfold_12_Type));
+        i++) {
+      for (j = 0; j < CaseUnfold_12[i].to.n; j++) {
+       r = (*f)(CaseUnfold_12[i].to.code[j],
+                (OnigCodePoint* )CaseUnfold_12[i].from, 2, arg);
+       if (r != 0) return r;   
+
+       for (k = 0; k < CaseUnfold_12[i].to.n; k++) {
+         if (k == j) continue;
+
+         r = (*f)(CaseUnfold_12[i].to.code[j],
+                  (OnigCodePoint* )(&CaseUnfold_12[i].to.code[k]), 1, arg);
+         if (r != 0) return r;
+       }
+      }
+    }
+
+#ifdef USE_UNICODE_CASE_FOLD_TURKISH_AZERI
+    if ((flag & ONIGENC_CASE_FOLD_TURKISH_AZERI) == 0) {
+#endif
+      for (i = 0;
+          i < (int )(sizeof(CaseUnfold_12_Locale)/sizeof(CaseUnfold_12_Type));
+          i++) {
+       for (j = 0; j < CaseUnfold_12_Locale[i].to.n; j++) {
+         r = (*f)(CaseUnfold_12_Locale[i].to.code[j],
+                  (OnigCodePoint* )CaseUnfold_12_Locale[i].from, 2, arg);
+         if (r != 0) return r; 
+
+         for (k = 0; k < CaseUnfold_12_Locale[i].to.n; k++) {
+           if (k == j) continue;
+
+           r = (*f)(CaseUnfold_12_Locale[i].to.code[j],
+                    (OnigCodePoint* )(&CaseUnfold_12_Locale[i].to.code[k]),
+                    1, arg);
+           if (r != 0) return r;
+         }
+       }
+      }
+#ifdef USE_UNICODE_CASE_FOLD_TURKISH_AZERI
+    }
+#endif
+
+    for (i = 0; i < (int )(sizeof(CaseUnfold_13)/sizeof(CaseUnfold_13_Type));
+        i++) {
+      for (j = 0; j < CaseUnfold_13[i].to.n; j++) {
+       r = (*f)(CaseUnfold_13[i].to.code[j],
+                (OnigCodePoint* )CaseUnfold_13[i].from, 3, arg);
+       if (r != 0) return r;   
+
+       for (k = 0; k < CaseUnfold_13[i].to.n; k++) {
+         if (k == j) continue;
+
+         r = (*f)(CaseUnfold_13[i].to.code[j],
+                  (OnigCodePoint* )(&CaseUnfold_13[i].to.code[k]), 1, arg);
+         if (r != 0) return r;
+       }
+      }
+    }
+  }
+
+  return 0;
+}
+
+extern int
+onigenc_unicode_get_case_fold_codes_by_str(OnigEncoding enc,
+    OnigCaseFoldType flag, const OnigUChar* p, const OnigUChar* end,
+    OnigCaseFoldCodeItem items[])
+{
+  int n, i, j, k, len;
+  OnigCodePoint code, codes[3];
+  CodePointList3 *to, *z3;
+  CodePointList2 *z2;
+
+  if (CaseFoldInited == 0) init_case_fold_table();
+
+  n = 0;
+
+  code = ONIGENC_MBC_TO_CODE(enc, p, end);
+  len = enclen(enc, p);
+
+#ifdef USE_UNICODE_CASE_FOLD_TURKISH_AZERI
+  if ((flag & ONIGENC_CASE_FOLD_TURKISH_AZERI) != 0) {
+    if (code == 0x0049) {
+      items[0].byte_len = len;
+      items[0].code_len = 1;
+      items[0].code[0]  = 0x0131;
+      return 1;
+    }
+    else if (code == 0x0130) {
+      items[0].byte_len = len;
+      items[0].code_len = 1;
+      items[0].code[0]  = 0x0069;
+      return 1;
+    }
+    else if (code == 0x0131) {
+      items[0].byte_len = len;
+      items[0].code_len = 1;
+      items[0].code[0]  = 0x0049;
+      return 1;
+    }
+    else if (code == 0x0069) {
+      items[0].byte_len = len;
+      items[0].code_len = 1;
+      items[0].code[0]  = 0x0130;
+      return 1;
+    }
+  }
+#endif
+
+  if (onig_st_lookup(FoldTable, (st_data_t )code, (void* )&to) != 0) {
+    if (to->n == 1) {
+      OnigCodePoint orig_code = code;
+
+      items[0].byte_len = len;
+      items[0].code_len = 1;
+      items[0].code[0]  = to->code[0];
+      n++;
+
+      code = to->code[0];
+      if (onig_st_lookup(Unfold1Table, (st_data_t )code, (void* )&to) != 0) {
+       for (i = 0; i < to->n; i++) {
+         if (to->code[i] != orig_code) {
+           items[n].byte_len = len;
+           items[n].code_len = 1;
+           items[n].code[0]  = to->code[i];
+           n++;
+         }
+       }
+      }
+    }
+    else if ((flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) {
+      OnigCodePoint cs[3][4];
+      int fn, ncs[3];
+
+      for (fn = 0; fn < to->n; fn++) {
+       cs[fn][0] = to->code[fn];
+       if (onig_st_lookup(Unfold1Table, (st_data_t )cs[fn][0],
+                          (void* )&z3) != 0) {
+         for (i = 0; i < z3->n; i++) {
+           cs[fn][i+1] = z3->code[i];
+         }
+         ncs[fn] = z3->n + 1;
+       }
+       else
+         ncs[fn] = 1;
+      }
+
+      if (fn == 2) {
+       for (i = 0; i < ncs[0]; i++) {
+         for (j = 0; j < ncs[1]; j++) {
+           items[n].byte_len = len;
+           items[n].code_len = 2;
+           items[n].code[0]  = cs[0][i];
+           items[n].code[1]  = cs[1][j];
+           n++;
+         }
+       }
+
+       if (onig_st_lookup(Unfold2Table, (st_data_t )to->code,
+                          (void* )&z2) != 0) {
+         for (i = 0; i < z2->n; i++) {
+           if (z2->code[i] == code) continue;
+
+           items[n].byte_len = len;
+           items[n].code_len = 1;
+           items[n].code[0]  = z2->code[i];
+           n++;
+         }
+       }
+      }
+      else {
+       for (i = 0; i < ncs[0]; i++) {
+         for (j = 0; j < ncs[1]; j++) {
+           for (k = 0; k < ncs[2]; k++) {
+             items[n].byte_len = len;
+             items[n].code_len = 3;
+             items[n].code[0]  = cs[0][i];
+             items[n].code[1]  = cs[1][j];
+             items[n].code[2]  = cs[2][k];
+             n++;
+           }
+         }
+       }
+
+       if (onig_st_lookup(Unfold3Table, (st_data_t )to->code,
+                          (void* )&z2) != 0) {
+         for (i = 0; i < z2->n; i++) {
+           if (z2->code[i] == code) continue;
+
+           items[n].byte_len = len;
+           items[n].code_len = 1;
+           items[n].code[0]  = z2->code[i];
+           n++;
+         }
+       }
+      }
+
+      /* multi char folded code is not head of another folded multi char */
+      flag = 0; /* DISABLE_CASE_FOLD_MULTI_CHAR(flag); */
+    }
+  }
+  else {
+    if (onig_st_lookup(Unfold1Table, (st_data_t )code, (void* )&to) != 0) {
+      for (i = 0; i < to->n; i++) {
+       items[n].byte_len = len;
+       items[n].code_len = 1;
+       items[n].code[0]  = to->code[i];
+       n++;
+      }
+    }
+  }
+
+
+  if ((flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) {
+    p += len;
+    if (p < end) {
+      int clen;
+
+      codes[0] = code;
+      code = ONIGENC_MBC_TO_CODE(enc, p, end);
+      if (onig_st_lookup(FoldTable, (st_data_t )code, (void* )&to) != 0
+         && to->n == 1) {
+       codes[1] = to->code[0];
+      }
+      else
+       codes[1] = code;
+
+      clen = enclen(enc, p);
+      len += clen;
+      if (onig_st_lookup(Unfold2Table, (st_data_t )codes, (void* )&z2) != 0) {
+       for (i = 0; i < z2->n; i++) {
+         items[n].byte_len = len;
+         items[n].code_len = 1;
+         items[n].code[0]  = z2->code[i];
+         n++;
+       }
+      }
+
+      p += clen;
+      if (p < end) {
+       code = ONIGENC_MBC_TO_CODE(enc, p, end);
+       if (onig_st_lookup(FoldTable, (st_data_t )code, (void* )&to) != 0
+           && to->n == 1) {
+         codes[2] = to->code[0];
+       }
+       else
+         codes[2] = code;
+
+       clen = enclen(enc, p);
+       len += clen;
+       if (onig_st_lookup(Unfold3Table, (st_data_t )codes,
+                          (void* )&z2) != 0) {
+         for (i = 0; i < z2->n; i++) {
+           items[n].byte_len = len;
+           items[n].code_len = 1;
+           items[n].code[0]  = z2->code[i];
+           n++;
+         }
+       }
+      }
+    }
+  }
+
+  return n;
 }
index 6ab80a6c1cea82ea5ed2bcc1135c45c5b4e9310e..1e909ebbf29307414503e49ba714dccf52e57e20 100755 (executable)
@@ -2,7 +2,7 @@
   utf16_be.c -  Oniguruma (regular expression library)
 **********************************************************************/
 /*-
- * Copyright (c) 2002-2006  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp>
+ * Copyright (c) 2002-2008  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -29,9 +29,6 @@
 
 #include "regenc.h"
 
-#define UTF16_IS_SURROGATE_FIRST(c)    (c >= 0xd8 && c <= 0xdb)
-#define UTF16_IS_SURROGATE_SECOND(c)   (c >= 0xdc && c <= 0xdf)
-
 static const int EncLen_UTF16[] = {
   2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
   2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
@@ -64,7 +61,11 @@ utf16be_is_mbc_newline(const UChar* p, const UChar* end)
     if (*(p+1) == 0x0a && *p == 0x00)
       return 1;
 #ifdef USE_UNICODE_ALL_LINE_TERMINATORS
-    if ((*(p+1) == 0x0d || *(p+1) == 0x85) && *p == 0x00)
+    if ((
+#ifndef USE_CRNL_AS_LINE_TERMINATOR
+        *(p+1) == 0x0d ||
+#endif
+        *(p+1) == 0x85) && *p == 0x00)
       return 1;
     if (*p == 0x20 && (*(p+1) == 0x29 || *(p+1) == 0x28))
       return 1;
@@ -74,7 +75,7 @@ utf16be_is_mbc_newline(const UChar* p, const UChar* end)
 }
 
 static OnigCodePoint
-utf16be_mbc_to_code(const UChar* p, const UChar* end)
+utf16be_mbc_to_code(const UChar* p, const UChar* end ARG_UNUSED)
 {
   OnigCodePoint code;
 
@@ -103,11 +104,11 @@ utf16be_code_to_mbc(OnigCodePoint code, UChar *buf)
   if (code > 0xffff) {
     unsigned int plane, high;
 
-    plane = code >> 16;
+    plane = (code >> 16) - 1;
     *p++ = (plane >> 2) + 0xd8;
     high = (code & 0xff00) >> 8;
     *p++ = ((plane & 0x03) << 6) + (high >> 2);
-    *p++ = (high & 0x02) + 0xdc;
+    *p++ = (high & 0x03) + 0xdc;
     *p   = (UChar )(code & 0xff);
     return 4;
   }
@@ -119,43 +120,37 @@ utf16be_code_to_mbc(OnigCodePoint code, UChar *buf)
 }
 
 static int
-utf16be_mbc_to_normalize(OnigAmbigType flag, const UChar** pp, const UChar* end,
-                         UChar* lower)
+utf16be_mbc_case_fold(OnigCaseFoldType flag,
+                     const UChar** pp, const UChar* end, UChar* fold)
 {
   const UChar* p = *pp;
 
-  if (*p == 0) {
+  if (ONIGENC_IS_ASCII_CODE(*(p+1)) && *p == 0) {
     p++;
-    *lower++ = '\0';
-    if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
-        ONIGENC_IS_MBC_ASCII(p)) ||
-       ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 &&
-        !ONIGENC_IS_MBC_ASCII(p))) {
-      *lower = ONIGENC_ISO_8859_1_TO_LOWER_CASE(*p);
-    }
-    else {
-      *lower = *p;
-    }
-
-    (*pp) += 2;
-    return 2;  /* return byte length of converted char to lower */
-  }
-  else {
-    int len;
-    len = EncLen_UTF16[*p];
-    if (lower != p) {
-      int i;
-      for (i = 0; i < len; i++) {
-       *lower++ = *p++;
+#ifdef USE_UNICODE_CASE_FOLD_TURKISH_AZERI
+    if ((flag & ONIGENC_CASE_FOLD_TURKISH_AZERI) != 0) {
+      if (*p == 0x49) {
+       *fold++ = 0x01;
+       *fold   = 0x31;
+       (*pp) += 2;
+       return 2;
       }
     }
-    (*pp) += len;
-    return len; /* return byte length of converted char to lower */
+#endif
+
+    *fold++ = 0;
+    *fold   = ONIGENC_ASCII_CODE_TO_LOWER_CASE(*p);
+    *pp += 2;
+    return 2;
   }
+  else
+    return onigenc_unicode_mbc_case_fold(ONIG_ENCODING_UTF16_BE, flag,
+                                        pp, end, fold);
 }
 
+#if 0
 static int
-utf16be_is_mbc_ambiguous(OnigAmbigType flag, const UChar** pp, const UChar* end)
+utf16be_is_mbc_ambiguous(OnigCaseFoldType flag, const UChar** pp, const UChar* end)
 {
   const UChar* p = *pp;
 
@@ -165,27 +160,27 @@ utf16be_is_mbc_ambiguous(OnigAmbigType flag, const UChar** pp, const UChar* end)
     int c, v;
 
     p++;
-    if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
-        ONIGENC_IS_MBC_ASCII(p)) ||
-       ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 &&
-        !ONIGENC_IS_MBC_ASCII(p))) {
-      c = *p;
-      v = ONIGENC_IS_UNICODE_ISO_8859_1_CTYPE(c,
-             (ONIGENC_CTYPE_UPPER | ONIGENC_CTYPE_LOWER));
-
-      if ((v | ONIGENC_CTYPE_LOWER) != 0) {
-        /* 0xaa, 0xb5, 0xba are lower case letter, but can't convert. */
-        if (c >= 0xaa && c <= 0xba)
-          return FALSE;
-        else
-          return TRUE;
-      }
-      return (v != 0 ? TRUE : FALSE);
+    if (*p == 0xdf && (flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) {
+      return TRUE;
+    }
+
+    c = *p;
+    v = ONIGENC_IS_UNICODE_ISO_8859_1_BIT_CTYPE(c,
+               (BIT_CTYPE_UPPER | BIT_CTYPE_LOWER));
+
+    if ((v | BIT_CTYPE_LOWER) != 0) {
+      /* 0xaa, 0xb5, 0xba are lower case letter, but can't convert. */
+      if (c >= 0xaa && c <= 0xba)
+       return FALSE;
+      else
+       return TRUE;
     }
+    return (v != 0 ? TRUE : FALSE);
   }
 
   return FALSE;
 }
+#endif
 
 static UChar*
 utf16be_left_adjust_char_head(const UChar* start, const UChar* s)
@@ -202,31 +197,29 @@ utf16be_left_adjust_char_head(const UChar* start, const UChar* s)
   return (UChar* )s;
 }
 
+static int
+utf16be_get_case_fold_codes_by_str(OnigCaseFoldType flag,
+    const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[])
+{
+  return onigenc_unicode_get_case_fold_codes_by_str(ONIG_ENCODING_UTF16_BE,
+                                                   flag, p, end, items);
+}
+
 OnigEncodingType OnigEncodingUTF16_BE = {
   utf16be_mbc_enc_len,
   "UTF-16BE",   /* name */
   4,            /* max byte length */
   2,            /* min byte length */
-  (ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE |
-   ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE ),
-  {
-      (OnigCodePoint )'\\'                       /* esc */
-    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.'  */
-    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*'  */
-    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
-    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
-    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
-  },
   utf16be_is_mbc_newline,
   utf16be_mbc_to_code,
   utf16be_code_to_mbclen,
   utf16be_code_to_mbc,
-  utf16be_mbc_to_normalize,
-  utf16be_is_mbc_ambiguous,
-  onigenc_iso_8859_1_get_all_pair_ambig_codes,
-  onigenc_ess_tsett_get_all_comp_ambig_codes,
+  utf16be_mbc_case_fold,
+  onigenc_unicode_apply_all_case_fold,
+  utf16be_get_case_fold_codes_by_str,
+  onigenc_unicode_property_name_to_ctype,
   onigenc_unicode_is_code_ctype,
-  onigenc_unicode_get_ctype_code_range,
+  onigenc_utf16_32_get_ctype_code_range,
   utf16be_left_adjust_char_head,
   onigenc_always_false_is_allowed_reverse_match
 };
index 2248e4910fd9ef3c3824fb3223c0e5de61402d6f..5cc07591173a01fdbe6316cea2495bd815fd0937 100755 (executable)
@@ -2,7 +2,7 @@
   utf16_le.c -  Oniguruma (regular expression library)
 **********************************************************************/
 /*-
- * Copyright (c) 2002-2006  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp>
+ * Copyright (c) 2002-2008  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -29,9 +29,6 @@
 
 #include "regenc.h"
 
-#define UTF16_IS_SURROGATE_FIRST(c)    (c >= 0xd8 && c <= 0xdb)
-#define UTF16_IS_SURROGATE_SECOND(c)   (c >= 0xdc && c <= 0xdf)
-
 static const int EncLen_UTF16[] = {
   2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
   2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
@@ -70,7 +67,11 @@ utf16le_is_mbc_newline(const UChar* p, const UChar* end)
     if (*p == 0x0a && *(p+1) == 0x00)
       return 1;
 #ifdef USE_UNICODE_ALL_LINE_TERMINATORS
-    if ((*p == 0x0d || *p == 0x85) && *(p+1) == 0x00)
+    if ((
+#ifndef USE_CRNL_AS_LINE_TERMINATOR
+        *p == 0x0d ||
+#endif
+        *p == 0x85) && *(p+1) == 0x00)
       return 1;
     if (*(p+1) == 0x20 && (*p == 0x29 || *p == 0x28))
       return 1;
@@ -80,7 +81,7 @@ utf16le_is_mbc_newline(const UChar* p, const UChar* end)
 }
 
 static OnigCodePoint
-utf16le_mbc_to_code(const UChar* p, const UChar* end)
+utf16le_mbc_to_code(const UChar* p, const UChar* end ARG_UNUSED)
 {
   OnigCodePoint code;
   UChar c0 = *p;
@@ -105,13 +106,13 @@ utf16le_code_to_mbc(OnigCodePoint code, UChar *buf)
   if (code > 0xffff) {
     unsigned int plane, high;
 
-    plane = code >> 16;
+    plane = (code >> 16) - 1;
     high = (code & 0xff00) >> 8;
 
     *p++ = ((plane & 0x03) << 6) + (high >> 2);
     *p++ = (plane >> 2) + 0xd8;
     *p++ = (UChar )(code & 0xff);
-    *p   = (high & 0x02) + 0xdc;
+    *p   = (high & 0x03) + 0xdc;
     return 4;
   }
   else {
@@ -122,40 +123,37 @@ utf16le_code_to_mbc(OnigCodePoint code, UChar *buf)
 }
 
 static int
-utf16le_mbc_to_normalize(OnigAmbigType flag, const UChar** pp, const UChar* end,
-                         UChar* lower)
+utf16le_mbc_case_fold(OnigCaseFoldType flag,
+                     const UChar** pp, const UChar* end, UChar* fold)
 {
   const UChar* p = *pp;
 
-  if (*(p+1) == 0) {
-    *(lower+1) = '\0';
-    if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
-        ONIGENC_IS_MBC_ASCII(p)) ||
-       ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 &&
-        !ONIGENC_IS_MBC_ASCII(p))) {
-      *lower = ONIGENC_ISO_8859_1_TO_LOWER_CASE(*p);
-    }
-    else {
-      *lower = *p;
-    }
-    (*pp) += 2;
-    return 2;  /* return byte length of converted char to lower */
-  }
-  else {
-    int len = EncLen_UTF16[*(p+1)];
-    if (lower != p) {
-      int i;
-      for (i = 0; i < len; i++) {
-       *lower++ = *p++;
+  if (ONIGENC_IS_ASCII_CODE(*p) && *(p+1) == 0) {
+#ifdef USE_UNICODE_CASE_FOLD_TURKISH_AZERI
+    if ((flag & ONIGENC_CASE_FOLD_TURKISH_AZERI) != 0) {
+      if (*p == 0x49) {
+       *fold++ = 0x31;
+       *fold   = 0x01;
+       (*pp) += 2;
+       return 2;
       }
     }
-    (*pp) += len;
-    return len; /* return byte length of converted char to lower */
+#endif
+
+    *fold++ = ONIGENC_ASCII_CODE_TO_LOWER_CASE(*p);
+    *fold   = 0;
+    *pp += 2;
+    return 2;
   }
+  else
+    return onigenc_unicode_mbc_case_fold(ONIG_ENCODING_UTF16_LE, flag, pp, end,
+                                        fold);
 }
 
+#if 0
 static int
-utf16le_is_mbc_ambiguous(OnigAmbigType flag, const UChar** pp, const UChar* end)
+utf16le_is_mbc_ambiguous(OnigCaseFoldType flag, const UChar** pp,
+                        const UChar* end)
 {
   const UChar* p = *pp;
 
@@ -164,26 +162,26 @@ utf16le_is_mbc_ambiguous(OnigAmbigType flag, const UChar** pp, const UChar* end)
   if (*(p+1) == 0) {
     int c, v;
 
-    if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
-        ONIGENC_IS_MBC_ASCII(p)) ||
-       ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 &&
-        !ONIGENC_IS_MBC_ASCII(p))) {
-      c = *p;
-      v = ONIGENC_IS_UNICODE_ISO_8859_1_CTYPE(c,
-                       (ONIGENC_CTYPE_UPPER | ONIGENC_CTYPE_LOWER));
-      if ((v | ONIGENC_CTYPE_LOWER) != 0) {
-        /* 0xaa, 0xb5, 0xba are lower case letter, but can't convert. */
-        if (c >= 0xaa && c <= 0xba)
-          return FALSE;
-        else
-          return TRUE;
-      }
-      return (v != 0 ? TRUE : FALSE);
+    if (*p == 0xdf && (flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) {
+      return TRUE;
     }
+
+    c = *p;
+    v = ONIGENC_IS_UNICODE_ISO_8859_1_BIT_CTYPE(c,
+                       (BIT_CTYPE_UPPER | BIT_CTYPE_LOWER));
+    if ((v | BIT_CTYPE_LOWER) != 0) {
+      /* 0xaa, 0xb5, 0xba are lower case letter, but can't convert. */
+      if (c >= 0xaa && c <= 0xba)
+       return FALSE;
+      else
+       return TRUE;
+    }
+    return (v != 0 ? TRUE : FALSE);
   }
 
   return FALSE;
 }
+#endif
 
 static UChar*
 utf16le_left_adjust_char_head(const UChar* start, const UChar* s)
@@ -200,31 +198,29 @@ utf16le_left_adjust_char_head(const UChar* start, const UChar* s)
   return (UChar* )s;
 }
 
+static int
+utf16le_get_case_fold_codes_by_str(OnigCaseFoldType flag,
+    const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[])
+{
+  return onigenc_unicode_get_case_fold_codes_by_str(ONIG_ENCODING_UTF16_LE,
+                                                   flag, p, end, items);
+}
+
 OnigEncodingType OnigEncodingUTF16_LE = {
   utf16le_mbc_enc_len,
   "UTF-16LE",   /* name */
   4,            /* max byte length */
   2,            /* min byte length */
-  (ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE |
-   ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE ),
-  {
-      (OnigCodePoint )'\\'                       /* esc */
-    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.'  */
-    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*'  */
-    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
-    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
-    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
-  },
   utf16le_is_mbc_newline,
   utf16le_mbc_to_code,
   utf16le_code_to_mbclen,
   utf16le_code_to_mbc,
-  utf16le_mbc_to_normalize,
-  utf16le_is_mbc_ambiguous,
-  onigenc_iso_8859_1_get_all_pair_ambig_codes,
-  onigenc_ess_tsett_get_all_comp_ambig_codes,
+  utf16le_mbc_case_fold,
+  onigenc_unicode_apply_all_case_fold,
+  utf16le_get_case_fold_codes_by_str,
+  onigenc_unicode_property_name_to_ctype,
   onigenc_unicode_is_code_ctype,
-  onigenc_unicode_get_ctype_code_range,
+  onigenc_utf16_32_get_ctype_code_range,
   utf16le_left_adjust_char_head,
   onigenc_always_false_is_allowed_reverse_match
 };
index 75133ca2626e1f518b0d59103d04ab071a52834d..b4f822607c89467d98e697c8c0610cd4481c12fc 100755 (executable)
@@ -2,7 +2,7 @@
   utf32_be.c -  Oniguruma (regular expression library)
 **********************************************************************/
 /*-
- * Copyright (c) 2002-2006  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp>
+ * Copyright (c) 2002-2007  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -30,7 +30,7 @@
 #include "regenc.h"
 
 static int
-utf32be_mbc_enc_len(const UChar* p)
+utf32be_mbc_enc_len(const UChar* p ARG_UNUSED)
 {
   return 4;
 }
@@ -42,7 +42,11 @@ utf32be_is_mbc_newline(const UChar* p, const UChar* end)
     if (*(p+3) == 0x0a && *(p+2) == 0 && *(p+1) == 0 && *p == 0)
       return 1;
 #ifdef USE_UNICODE_ALL_LINE_TERMINATORS
-    if ((*(p+3) == 0x0d || *(p+3) == 0x85)
+    if ((
+#ifndef USE_CRNL_AS_LINE_TERMINATOR
+        *(p+3) == 0x0d ||
+#endif
+        *(p+3) == 0x85)
        && *(p+2) == 0 && *(p+1) == 0 && *p == 0x00)
       return 1;
     if (*(p+2) == 0x20 && (*(p+3) == 0x29 || *(p+3) == 0x28)
@@ -54,13 +58,13 @@ utf32be_is_mbc_newline(const UChar* p, const UChar* end)
 }
 
 static OnigCodePoint
-utf32be_mbc_to_code(const UChar* p, const UChar* end)
+utf32be_mbc_to_code(const UChar* p, const UChar* end ARG_UNUSED)
 {
   return (OnigCodePoint )(((p[0] * 256 + p[1]) * 256 + p[2]) * 256 + p[3]);
 }
 
 static int
-utf32be_code_to_mbclen(OnigCodePoint code)
+utf32be_code_to_mbclen(OnigCodePoint code ARG_UNUSED)
 {
   return 4;
 }
@@ -78,44 +82,39 @@ utf32be_code_to_mbc(OnigCodePoint code, UChar *buf)
 }
 
 static int
-utf32be_mbc_to_normalize(OnigAmbigType flag, const UChar** pp, const UChar* end,
-                         UChar* lower)
+utf32be_mbc_case_fold(OnigCaseFoldType flag,
+                     const UChar** pp, const UChar* end, UChar* fold)
 {
   const UChar* p = *pp;
 
-  if (*(p+2) == 0 && *(p+1) == 0 && *p == 0) {
-    p += 3;
-    *lower++ = '\0';
-    *lower++ = '\0';
-    *lower++ = '\0';
-    if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
-        ONIGENC_IS_MBC_ASCII(p)) ||
-       ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 &&
-        !ONIGENC_IS_MBC_ASCII(p))) {
-      *lower = ONIGENC_ISO_8859_1_TO_LOWER_CASE(*p);
-    }
-    else {
-      *lower = *p;
-    }
-
-    (*pp) += 4;
-    return 4;  /* return byte length of converted char to lower */
-  }
-  else {
-    int len = 4;
-    if (lower != p) {
-      int i;
-      for (i = 0; i < len; i++) {
-       *lower++ = *p++;
+  if (ONIGENC_IS_ASCII_CODE(*(p+3)) && *(p+2) == 0 && *(p+1) == 0 && *p == 0) {
+    *fold++ = 0;
+    *fold++ = 0;
+
+#ifdef USE_UNICODE_CASE_FOLD_TURKISH_AZERI
+    if ((flag & ONIGENC_CASE_FOLD_TURKISH_AZERI) != 0) {
+      if (*(p+3) == 0x49) {
+       *fold++ = 0x01;
+       *fold   = 0x31;
+       (*pp) += 4;
+       return 4;
       }
     }
-    (*pp) += len;
-    return len; /* return byte length of converted char to lower */
+#endif
+
+    *fold++ = 0;
+    *fold   = ONIGENC_ASCII_CODE_TO_LOWER_CASE(*(p+3));
+    *pp += 4;
+    return 4;
   }
+  else
+    return onigenc_unicode_mbc_case_fold(ONIG_ENCODING_UTF32_BE, flag, pp, end,
+                                        fold);
 }
 
+#if 0
 static int
-utf32be_is_mbc_ambiguous(OnigAmbigType flag, const UChar** pp, const UChar* end)
+utf32be_is_mbc_ambiguous(OnigCaseFoldType flag, const UChar** pp, const UChar* end)
 {
   const UChar* p = *pp;
 
@@ -125,26 +124,26 @@ utf32be_is_mbc_ambiguous(OnigAmbigType flag, const UChar** pp, const UChar* end)
     int c, v;
 
     p += 3;
-    if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
-        ONIGENC_IS_MBC_ASCII(p)) ||
-       ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 &&
-        !ONIGENC_IS_MBC_ASCII(p))) {
-      c = *p;
-      v = ONIGENC_IS_UNICODE_ISO_8859_1_CTYPE(c,
-                       (ONIGENC_CTYPE_UPPER | ONIGENC_CTYPE_LOWER));
-      if ((v | ONIGENC_CTYPE_LOWER) != 0) {
-        /* 0xaa, 0xb5, 0xba are lower case letter, but can't convert. */
-        if (c >= 0xaa && c <= 0xba)
-          return FALSE;
-        else
-          return TRUE;
-      }
-      return (v != 0 ? TRUE : FALSE);
+    if (*p == 0xdf && (flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) {
+      return TRUE;
+    }
+
+    c = *p;
+    v = ONIGENC_IS_UNICODE_ISO_8859_1_BIT_CTYPE(c,
+                       (BIT_CTYPE_UPPER | BIT_CTYPE_LOWER));
+    if ((v | BIT_CTYPE_LOWER) != 0) {
+      /* 0xaa, 0xb5, 0xba are lower case letter, but can't convert. */
+      if (c >= 0xaa && c <= 0xba)
+       return FALSE;
+      else
+       return TRUE;
     }
+    return (v != 0 ? TRUE : FALSE);
   }
 
   return FALSE;
 }
+#endif
 
 static UChar*
 utf32be_left_adjust_char_head(const UChar* start, const UChar* s)
@@ -157,31 +156,29 @@ utf32be_left_adjust_char_head(const UChar* start, const UChar* s)
   return (UChar* )(s - rem);
 }
 
+static int
+utf32be_get_case_fold_codes_by_str(OnigCaseFoldType flag,
+    const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[])
+{
+  return onigenc_unicode_get_case_fold_codes_by_str(ONIG_ENCODING_UTF32_BE,
+                                                   flag, p, end, items);
+}
+
 OnigEncodingType OnigEncodingUTF32_BE = {
   utf32be_mbc_enc_len,
   "UTF-32BE",   /* name */
   4,            /* max byte length */
   4,            /* min byte length */
-  (ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE |
-   ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE ),
-  {
-      (OnigCodePoint )'\\'                       /* esc */
-    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.'  */
-    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*'  */
-    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
-    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
-    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
-  },
   utf32be_is_mbc_newline,
   utf32be_mbc_to_code,
   utf32be_code_to_mbclen,
   utf32be_code_to_mbc,
-  utf32be_mbc_to_normalize,
-  utf32be_is_mbc_ambiguous,
-  onigenc_iso_8859_1_get_all_pair_ambig_codes,
-  onigenc_ess_tsett_get_all_comp_ambig_codes,
+  utf32be_mbc_case_fold,
+  onigenc_unicode_apply_all_case_fold,
+  utf32be_get_case_fold_codes_by_str,
+  onigenc_unicode_property_name_to_ctype,
   onigenc_unicode_is_code_ctype,
-  onigenc_unicode_get_ctype_code_range,
+  onigenc_utf16_32_get_ctype_code_range,
   utf32be_left_adjust_char_head,
   onigenc_always_false_is_allowed_reverse_match
 };
index 21dca10c115d21a43e7cbf02771bc7342ee2b681..8f413bfc74e12b1cfc8cabb8d3c284dcb67681cd 100755 (executable)
@@ -2,7 +2,7 @@
   utf32_le.c -  Oniguruma (regular expression library)
 **********************************************************************/
 /*-
- * Copyright (c) 2002-2006  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp>
+ * Copyright (c) 2002-2007  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -30,7 +30,7 @@
 #include "regenc.h"
 
 static int
-utf32le_mbc_enc_len(const UChar* p)
+utf32le_mbc_enc_len(const UChar* p ARG_UNUSED)
 {
   return 4;
 }
@@ -42,8 +42,12 @@ utf32le_is_mbc_newline(const UChar* p, const UChar* end)
     if (*p == 0x0a && *(p+1) == 0 && *(p+2) == 0 && *(p+3) == 0)
       return 1;
 #ifdef USE_UNICODE_ALL_LINE_TERMINATORS
-    if ((*p == 0x0d || *p == 0x85) && *(p+1) == 0x00
-       && (p+2) == 0x00 && *(p+3) == 0x00)
+    if ((
+#ifndef USE_CRNL_AS_LINE_TERMINATOR
+        *p == 0x0d ||
+#endif
+        *p == 0x85)
+       && *(p+1) == 0x00 && (p+2) == 0x00 && *(p+3) == 0x00)
       return 1;
     if (*(p+1) == 0x20 && (*p == 0x29 || *p == 0x28)
        && *(p+2) == 0x00 && *(p+3) == 0x00)
@@ -54,13 +58,13 @@ utf32le_is_mbc_newline(const UChar* p, const UChar* end)
 }
 
 static OnigCodePoint
-utf32le_mbc_to_code(const UChar* p, const UChar* end)
+utf32le_mbc_to_code(const UChar* p, const UChar* end ARG_UNUSED)
 {
   return (OnigCodePoint )(((p[3] * 256 + p[2]) * 256 + p[1]) * 256 + p[0]);
 }
 
 static int
-utf32le_code_to_mbclen(OnigCodePoint code)
+utf32le_code_to_mbclen(OnigCodePoint code ARG_UNUSED)
 {
   return 4;
 }
@@ -78,43 +82,40 @@ utf32le_code_to_mbc(OnigCodePoint code, UChar *buf)
 }
 
 static int
-utf32le_mbc_to_normalize(OnigAmbigType flag, const UChar** pp, const UChar* end,
-                         UChar* lower)
+utf32le_mbc_case_fold(OnigCaseFoldType flag,
+                     const UChar** pp, const UChar* end, UChar* fold)
 {
   const UChar* p = *pp;
 
-  if (*(p+1) == 0 && *(p+2) == 0 && *(p+3) == 0) {
-    if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
-        ONIGENC_IS_MBC_ASCII(p)) ||
-       ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 &&
-        !ONIGENC_IS_MBC_ASCII(p))) {
-      *lower++ = ONIGENC_ISO_8859_1_TO_LOWER_CASE(*p);
+  if (ONIGENC_IS_ASCII_CODE(*p) && *(p+1) == 0 && *(p+2) == 0 && *(p+3) == 0) {
+#ifdef USE_UNICODE_CASE_FOLD_TURKISH_AZERI
+    if ((flag & ONIGENC_CASE_FOLD_TURKISH_AZERI) != 0) {
+      if (*p == 0x49) {
+       *fold++ = 0x31;
+       *fold++ = 0x01;
+      }
     }
     else {
-      *lower++ = *p;
+#endif
+      *fold++ = ONIGENC_ASCII_CODE_TO_LOWER_CASE(*p);
+      *fold++ = 0;
+#ifdef USE_UNICODE_CASE_FOLD_TURKISH_AZERI
     }
-    *lower++ = '\0';
-    *lower++ = '\0';
-    *lower   = '\0';
+#endif
 
-    (*pp) += 4;
-    return 4;  /* return byte length of converted char to lower */
-  }
-  else {
-    int len = 4;
-    if (lower != p) {
-      int i;
-      for (i = 0; i < len; i++) {
-       *lower++ = *p++;
-      }
-    }
-    (*pp) += len;
-    return len; /* return byte length of converted char to lower */
+    *fold++ = 0;
+    *fold   = 0;
+    *pp += 4;
+    return 4;
   }
+  else
+    return onigenc_unicode_mbc_case_fold(ONIG_ENCODING_UTF32_LE, flag, pp, end,
+                                        fold);
 }
 
+#if 0
 static int
-utf32le_is_mbc_ambiguous(OnigAmbigType flag, const UChar** pp, const UChar* end)
+utf32le_is_mbc_ambiguous(OnigCaseFoldType flag, const UChar** pp, const UChar* end)
 {
   const UChar* p = *pp;
 
@@ -123,26 +124,26 @@ utf32le_is_mbc_ambiguous(OnigAmbigType flag, const UChar** pp, const UChar* end)
   if (*(p+1) == 0 && *(p+2) == 0 && *(p+3) == 0) {
     int c, v;
 
-    if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
-        ONIGENC_IS_MBC_ASCII(p)) ||
-       ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 &&
-        !ONIGENC_IS_MBC_ASCII(p))) {
-      c = *p;
-      v = ONIGENC_IS_UNICODE_ISO_8859_1_CTYPE(c,
-                       (ONIGENC_CTYPE_UPPER | ONIGENC_CTYPE_LOWER));
-      if ((v | ONIGENC_CTYPE_LOWER) != 0) {
-        /* 0xaa, 0xb5, 0xba are lower case letter, but can't convert. */
-        if (c >= 0xaa && c <= 0xba)
-          return FALSE;
-        else
-          return TRUE;
-      }
-      return (v != 0 ? TRUE : FALSE);
+    if (*p == 0xdf && (flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) {
+      return TRUE;
+    }
+
+    c = *p;
+    v = ONIGENC_IS_UNICODE_ISO_8859_1_BIT_CTYPE(c,
+                       (BIT_CTYPE_UPPER | BIT_CTYPE_LOWER));
+    if ((v | BIT_CTYPE_LOWER) != 0) {
+      /* 0xaa, 0xb5, 0xba are lower case letter, but can't convert. */
+      if (c >= 0xaa && c <= 0xba)
+       return FALSE;
+      else
+       return TRUE;
     }
+    return (v != 0 ? TRUE : FALSE);
   }
 
   return FALSE;
 }
+#endif
 
 static UChar*
 utf32le_left_adjust_char_head(const UChar* start, const UChar* s)
@@ -155,31 +156,29 @@ utf32le_left_adjust_char_head(const UChar* start, const UChar* s)
   return (UChar* )(s - rem);
 }
 
+static int
+utf32le_get_case_fold_codes_by_str(OnigCaseFoldType flag,
+    const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[])
+{
+  return onigenc_unicode_get_case_fold_codes_by_str(ONIG_ENCODING_UTF32_LE,
+                                                   flag, p, end, items);
+}
+
 OnigEncodingType OnigEncodingUTF32_LE = {
   utf32le_mbc_enc_len,
   "UTF-32LE",   /* name */
   4,            /* max byte length */
   4,            /* min byte length */
-  (ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE |
-   ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE ),
-  {
-      (OnigCodePoint )'\\'                       /* esc */
-    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.'  */
-    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*'  */
-    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
-    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
-    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
-  },
   utf32le_is_mbc_newline,
   utf32le_mbc_to_code,
   utf32le_code_to_mbclen,
   utf32le_code_to_mbc,
-  utf32le_mbc_to_normalize,
-  utf32le_is_mbc_ambiguous,
-  onigenc_iso_8859_1_get_all_pair_ambig_codes,
-  onigenc_ess_tsett_get_all_comp_ambig_codes,
+  utf32le_mbc_case_fold,
+  onigenc_unicode_apply_all_case_fold,
+  utf32le_get_case_fold_codes_by_str,
+  onigenc_unicode_property_name_to_ctype,
   onigenc_unicode_is_code_ctype,
-  onigenc_unicode_get_ctype_code_range,
+  onigenc_utf16_32_get_ctype_code_range,
   utf32le_left_adjust_char_head,
   onigenc_always_false_is_allowed_reverse_match
 };
index c7481d7050ae9d659bfa8be3331ccffa63bdae60..5e2c1721aa1cc4e0414d28cc248e481bc8ba59d1 100644 (file)
@@ -2,7 +2,7 @@
   utf8.c -  Oniguruma (regular expression library)
 **********************************************************************/
 /*-
- * Copyright (c) 2002-2006  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp>
+ * Copyright (c) 2002-2007  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -60,19 +60,21 @@ static const int EncLen_UTF8[] = {
 };
 
 static int
-utf8_mbc_enc_len(const UChar* p)
+mbc_enc_len(const UChar* p)
 {
   return EncLen_UTF8[*p];
 }
 
 static int
-utf8_is_mbc_newline(const UChar* p, const UChar* end)
+is_mbc_newline(const UChar* p, const UChar* end)
 {
   if (p < end) {
     if (*p == 0x0a) return 1;
 
 #ifdef USE_UNICODE_ALL_LINE_TERMINATORS
+#ifndef USE_CRNL_AS_LINE_TERMINATOR
     if (*p == 0x0d) return 1;
+#endif
     if (p + 1 < end) {
       if (*(p+1) == 0x85 && *p == 0xc2) /* U+0085 */
        return 1;
@@ -89,12 +91,12 @@ utf8_is_mbc_newline(const UChar* p, const UChar* end)
 }
 
 static OnigCodePoint
-utf8_mbc_to_code(const UChar* p, const UChar* end)
+mbc_to_code(const UChar* p, const UChar* end ARG_UNUSED)
 {
   int c, len;
   OnigCodePoint n;
 
-  len = enc_len(ONIG_ENCODING_UTF8, p);
+  len = enclen(ONIG_ENCODING_UTF8, p);
   c = *p++;
   if (len > 1) {
     len--;
@@ -116,14 +118,10 @@ utf8_mbc_to_code(const UChar* p, const UChar* end)
 }
 
 static int
-utf8_code_to_mbclen(OnigCodePoint code)
+code_to_mbclen(OnigCodePoint code)
 {
   if      ((code & 0xffffff80) == 0) return 1;
-  else if ((code & 0xfffff800) == 0) {
-    if (code <= 0xff && code >= 0xfe)
-      return 1;
-    return 2;
-  }
+  else if ((code & 0xfffff800) == 0) return 2;
   else if ((code & 0xffff0000) == 0) return 3;
   else if ((code & 0xffe00000) == 0) return 4;
   else if ((code & 0xfc000000) == 0) return 5;
@@ -133,35 +131,11 @@ utf8_code_to_mbclen(OnigCodePoint code)
   else if (code == INVALID_CODE_FF) return 1;
 #endif
   else
-    return ONIGENCERR_TOO_BIG_WIDE_CHAR_VALUE;
-}
-
-#if 0
-static int
-utf8_code_to_mbc_first(OnigCodePoint code)
-{
-  if ((code & 0xffffff80) == 0)
-    return code;
-  else {
-    if ((code & 0xfffff800) == 0)
-      return ((code>>6)& 0x1f) | 0xc0;
-    else if ((code & 0xffff0000) == 0)
-      return ((code>>12) & 0x0f) | 0xe0;
-    else if ((code & 0xffe00000) == 0)
-      return ((code>>18) & 0x07) | 0xf0;
-    else if ((code & 0xfc000000) == 0)
-      return ((code>>24) & 0x03) | 0xf8;
-    else if ((code & 0x80000000) == 0)
-      return ((code>>30) & 0x01) | 0xfc;
-    else {
-      return ONIGENCERR_TOO_BIG_WIDE_CHAR_VALUE;
-    }
-  }
+    return ONIGERR_INVALID_CODE_POINT_VALUE;
 }
-#endif
 
 static int
-utf8_code_to_mbc(OnigCodePoint code, UChar *buf)
+code_to_mbc(OnigCodePoint code, UChar *buf)
 {
 #define UTF8_TRAILS(code, shift) (UChar )((((code) >> (shift)) & 0x3f) | 0x80)
 #define UTF8_TRAIL0(code)        (UChar )(((code) & 0x3f) | 0x80)
@@ -209,7 +183,7 @@ utf8_code_to_mbc(OnigCodePoint code, UChar *buf)
     }
 #endif
     else {
-      return ONIGENCERR_TOO_BIG_WIDE_CHAR_VALUE;
+      return ONIGERR_TOO_BIG_WIDE_CHAR_VALUE;
     }
 
     *p++ = UTF8_TRAIL0(code);
@@ -218,76 +192,60 @@ utf8_code_to_mbc(OnigCodePoint code, UChar *buf)
 }
 
 static int
-utf8_mbc_to_normalize(OnigAmbigType flag, const UChar** pp, const UChar* end, UChar* lower)
+mbc_case_fold(OnigCaseFoldType flag, const UChar** pp,
+             const UChar* end, UChar* fold)
 {
   const UChar* p = *pp;
 
   if (ONIGENC_IS_MBC_ASCII(p)) {
-    if ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0) {
-      *lower = ONIGENC_ASCII_CODE_TO_LOWER_CASE(*p);
-    }
-    else {
-      *lower = *p;
+#ifdef USE_UNICODE_CASE_FOLD_TURKISH_AZERI
+    if ((flag & ONIGENC_CASE_FOLD_TURKISH_AZERI) != 0) {
+      if (*p == 0x49) {
+       *fold++ = 0xc4;
+       *fold   = 0xb1;
+       (*pp)++;
+       return 2;
+      }
     }
+#endif
+
+    *fold = ONIGENC_ASCII_CODE_TO_LOWER_CASE(*p);
     (*pp)++;
     return 1; /* return byte length of converted char to lower */
   }
   else {
-    int len;
-
-    if (*p == 195) { /* 195 == '\303' */
-      int c = *(p + 1);
-      if (c >= 128) {
-        if (c <= (UChar )'\236' &&  /* upper */
-            (flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0) {
-          if (c != (UChar )'\227') {
-            *lower++ = *p;
-            *lower   = (UChar )(c + 32);
-            (*pp) += 2;
-            return 2;
-          }
-        }
-      }
-    }
-
-    len = enc_len(ONIG_ENCODING_UTF8, p);
-    if (lower != p) {
-      int i;
-      for (i = 0; i < len; i++) {
-       *lower++ = *p++;
-      }
-    }
-    (*pp) += len;
-    return len; /* return byte length of converted char to lower */
+    return onigenc_unicode_mbc_case_fold(ONIG_ENCODING_UTF8, flag,
+                                        pp, end, fold);
   }
 }
 
+#if 0
 static int
-utf8_is_mbc_ambiguous(OnigAmbigType flag, const UChar** pp, const UChar* end)
+is_mbc_ambiguous(OnigCaseFoldType flag, const UChar** pp, const UChar* end)
 {
   const UChar* p = *pp;
 
   if (ONIGENC_IS_MBC_ASCII(p)) {
     (*pp)++;
-    if ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0) {
-      return ONIGENC_IS_ASCII_CODE_CASE_AMBIG(*p);
-    }
+    return ONIGENC_IS_ASCII_CODE_CASE_AMBIG(*p);
   }
   else {
-    (*pp) += enc_len(ONIG_ENCODING_UTF8, p);
+    (*pp) += enclen(ONIG_ENCODING_UTF8, p);
 
-    if (*p == 195) { /* 195 == '\303' */
+    if (*p == 0xc3) {
       int c = *(p + 1);
-      if (c >= 128) {
-        if ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0) {
-          if (c <= (UChar )'\236') { /* upper */
-            if (c == (UChar )'\227') return FALSE;
-            return TRUE;
-          }
-          else if (c >= (UChar )'\240' && c <= (UChar )'\276') { /* lower */
-            if (c == (UChar )'\267') return FALSE;
-            return TRUE;
-          }
+      if (c >= 0x80) {
+       if (c <= (UChar )0x9e) { /* upper */
+         if (c == (UChar )0x97) return FALSE;
+         return TRUE;
+       }
+       else if (c >= (UChar )0xa0 && c <= (UChar )0xbe) { /* lower */
+         if (c == (UChar )'\267') return FALSE;
+         return TRUE;
+       }
+        else if (c == (UChar )0x9f &&
+                 (flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) {
+         return TRUE;
         }
       }
     }
@@ -295,3401 +253,20 @@ utf8_is_mbc_ambiguous(OnigAmbigType flag, const UChar** pp, const UChar* end)
 
   return FALSE;
 }
-
-
-static const OnigCodePoint EmptyRange[] = { 0 };
-
-static const OnigCodePoint SBAlnum[] = {
-  3,
-  0x0030, 0x0039,
-  0x0041, 0x005a,
-  0x0061, 0x007a
-};
-
-static const OnigCodePoint MBAlnum[] = {
-#ifdef USE_UNICODE_FULL_RANGE_CTYPE
-  411,
-#else
-  6,
-#endif
-  0x00aa, 0x00aa,
-  0x00b5, 0x00b5,
-  0x00ba, 0x00ba,
-  0x00c0, 0x00d6,
-  0x00d8, 0x00f6,
-  0x00f8, 0x0236
-#ifdef USE_UNICODE_FULL_RANGE_CTYPE
-  ,
-  0x0250, 0x02c1,
-  0x02c6, 0x02d1,
-  0x02e0, 0x02e4,
-  0x02ee, 0x02ee,
-  0x0300, 0x0357,
-  0x035d, 0x036f,
-  0x037a, 0x037a,
-  0x0386, 0x0386,
-  0x0388, 0x038a,
-  0x038c, 0x038c,
-  0x038e, 0x03a1,
-  0x03a3, 0x03ce,
-  0x03d0, 0x03f5,
-  0x03f7, 0x03fb,
-  0x0400, 0x0481,
-  0x0483, 0x0486,
-  0x0488, 0x04ce,
-  0x04d0, 0x04f5,
-  0x04f8, 0x04f9,
-  0x0500, 0x050f,
-  0x0531, 0x0556,
-  0x0559, 0x0559,
-  0x0561, 0x0587,
-  0x0591, 0x05a1,
-  0x05a3, 0x05b9,
-  0x05bb, 0x05bd,
-  0x05bf, 0x05bf,
-  0x05c1, 0x05c2,
-  0x05c4, 0x05c4,
-  0x05d0, 0x05ea,
-  0x05f0, 0x05f2,
-  0x0610, 0x0615,
-  0x0621, 0x063a,
-  0x0640, 0x0658,
-  0x0660, 0x0669,
-  0x066e, 0x06d3,
-  0x06d5, 0x06dc,
-  0x06de, 0x06e8,
-  0x06ea, 0x06fc,
-  0x06ff, 0x06ff,
-  0x0710, 0x074a,
-  0x074d, 0x074f,
-  0x0780, 0x07b1,
-  0x0901, 0x0939,
-  0x093c, 0x094d,
-  0x0950, 0x0954,
-  0x0958, 0x0963,
-  0x0966, 0x096f,
-  0x0981, 0x0983,
-  0x0985, 0x098c,
-  0x098f, 0x0990,
-  0x0993, 0x09a8,
-  0x09aa, 0x09b0,
-  0x09b2, 0x09b2,
-  0x09b6, 0x09b9,
-  0x09bc, 0x09c4,
-  0x09c7, 0x09c8,
-  0x09cb, 0x09cd,
-  0x09d7, 0x09d7,
-  0x09dc, 0x09dd,
-  0x09df, 0x09e3,
-  0x09e6, 0x09f1,
-  0x0a01, 0x0a03,
-  0x0a05, 0x0a0a,
-  0x0a0f, 0x0a10,
-  0x0a13, 0x0a28,
-  0x0a2a, 0x0a30,
-  0x0a32, 0x0a33,
-  0x0a35, 0x0a36,
-  0x0a38, 0x0a39,
-  0x0a3c, 0x0a3c,
-  0x0a3e, 0x0a42,
-  0x0a47, 0x0a48,
-  0x0a4b, 0x0a4d,
-  0x0a59, 0x0a5c,
-  0x0a5e, 0x0a5e,
-  0x0a66, 0x0a74,
-  0x0a81, 0x0a83,
-  0x0a85, 0x0a8d,
-  0x0a8f, 0x0a91,
-  0x0a93, 0x0aa8,
-  0x0aaa, 0x0ab0,
-  0x0ab2, 0x0ab3,
-  0x0ab5, 0x0ab9,
-  0x0abc, 0x0ac5,
-  0x0ac7, 0x0ac9,
-  0x0acb, 0x0acd,
-  0x0ad0, 0x0ad0,
-  0x0ae0, 0x0ae3,
-  0x0ae6, 0x0aef,
-  0x0b01, 0x0b03,
-  0x0b05, 0x0b0c,
-  0x0b0f, 0x0b10,
-  0x0b13, 0x0b28,
-  0x0b2a, 0x0b30,
-  0x0b32, 0x0b33,
-  0x0b35, 0x0b39,
-  0x0b3c, 0x0b43,
-  0x0b47, 0x0b48,
-  0x0b4b, 0x0b4d,
-  0x0b56, 0x0b57,
-  0x0b5c, 0x0b5d,
-  0x0b5f, 0x0b61,
-  0x0b66, 0x0b6f,
-  0x0b71, 0x0b71,
-  0x0b82, 0x0b83,
-  0x0b85, 0x0b8a,
-  0x0b8e, 0x0b90,
-  0x0b92, 0x0b95,
-  0x0b99, 0x0b9a,
-  0x0b9c, 0x0b9c,
-  0x0b9e, 0x0b9f,
-  0x0ba3, 0x0ba4,
-  0x0ba8, 0x0baa,
-  0x0bae, 0x0bb5,
-  0x0bb7, 0x0bb9,
-  0x0bbe, 0x0bc2,
-  0x0bc6, 0x0bc8,
-  0x0bca, 0x0bcd,
-  0x0bd7, 0x0bd7,
-  0x0be7, 0x0bef,
-  0x0c01, 0x0c03,
-  0x0c05, 0x0c0c,
-  0x0c0e, 0x0c10,
-  0x0c12, 0x0c28,
-  0x0c2a, 0x0c33,
-  0x0c35, 0x0c39,
-  0x0c3e, 0x0c44,
-  0x0c46, 0x0c48,
-  0x0c4a, 0x0c4d,
-  0x0c55, 0x0c56,
-  0x0c60, 0x0c61,
-  0x0c66, 0x0c6f,
-  0x0c82, 0x0c83,
-  0x0c85, 0x0c8c,
-  0x0c8e, 0x0c90,
-  0x0c92, 0x0ca8,
-  0x0caa, 0x0cb3,
-  0x0cb5, 0x0cb9,
-  0x0cbc, 0x0cc4,
-  0x0cc6, 0x0cc8,
-  0x0cca, 0x0ccd,
-  0x0cd5, 0x0cd6,
-  0x0cde, 0x0cde,
-  0x0ce0, 0x0ce1,
-  0x0ce6, 0x0cef,
-  0x0d02, 0x0d03,
-  0x0d05, 0x0d0c,
-  0x0d0e, 0x0d10,
-  0x0d12, 0x0d28,
-  0x0d2a, 0x0d39,
-  0x0d3e, 0x0d43,
-  0x0d46, 0x0d48,
-  0x0d4a, 0x0d4d,
-  0x0d57, 0x0d57,
-  0x0d60, 0x0d61,
-  0x0d66, 0x0d6f,
-  0x0d82, 0x0d83,
-  0x0d85, 0x0d96,
-  0x0d9a, 0x0db1,
-  0x0db3, 0x0dbb,
-  0x0dbd, 0x0dbd,
-  0x0dc0, 0x0dc6,
-  0x0dca, 0x0dca,
-  0x0dcf, 0x0dd4,
-  0x0dd6, 0x0dd6,
-  0x0dd8, 0x0ddf,
-  0x0df2, 0x0df3,
-  0x0e01, 0x0e3a,
-  0x0e40, 0x0e4e,
-  0x0e50, 0x0e59,
-  0x0e81, 0x0e82,
-  0x0e84, 0x0e84,
-  0x0e87, 0x0e88,
-  0x0e8a, 0x0e8a,
-  0x0e8d, 0x0e8d,
-  0x0e94, 0x0e97,
-  0x0e99, 0x0e9f,
-  0x0ea1, 0x0ea3,
-  0x0ea5, 0x0ea5,
-  0x0ea7, 0x0ea7,
-  0x0eaa, 0x0eab,
-  0x0ead, 0x0eb9,
-  0x0ebb, 0x0ebd,
-  0x0ec0, 0x0ec4,
-  0x0ec6, 0x0ec6,
-  0x0ec8, 0x0ecd,
-  0x0ed0, 0x0ed9,
-  0x0edc, 0x0edd,
-  0x0f00, 0x0f00,
-  0x0f18, 0x0f19,
-  0x0f20, 0x0f29,
-  0x0f35, 0x0f35,
-  0x0f37, 0x0f37,
-  0x0f39, 0x0f39,
-  0x0f3e, 0x0f47,
-  0x0f49, 0x0f6a,
-  0x0f71, 0x0f84,
-  0x0f86, 0x0f8b,
-  0x0f90, 0x0f97,
-  0x0f99, 0x0fbc,
-  0x0fc6, 0x0fc6,
-  0x1000, 0x1021,
-  0x1023, 0x1027,
-  0x1029, 0x102a,
-  0x102c, 0x1032,
-  0x1036, 0x1039,
-  0x1040, 0x1049,
-  0x1050, 0x1059,
-  0x10a0, 0x10c5,
-  0x10d0, 0x10f8,
-  0x1100, 0x1159,
-  0x115f, 0x11a2,
-  0x11a8, 0x11f9,
-  0x1200, 0x1206,
-  0x1208, 0x1246,
-  0x1248, 0x1248,
-  0x124a, 0x124d,
-  0x1250, 0x1256,
-  0x1258, 0x1258,
-  0x125a, 0x125d,
-  0x1260, 0x1286,
-  0x1288, 0x1288,
-  0x128a, 0x128d,
-  0x1290, 0x12ae,
-  0x12b0, 0x12b0,
-  0x12b2, 0x12b5,
-  0x12b8, 0x12be,
-  0x12c0, 0x12c0,
-  0x12c2, 0x12c5,
-  0x12c8, 0x12ce,
-  0x12d0, 0x12d6,
-  0x12d8, 0x12ee,
-  0x12f0, 0x130e,
-  0x1310, 0x1310,
-  0x1312, 0x1315,
-  0x1318, 0x131e,
-  0x1320, 0x1346,
-  0x1348, 0x135a,
-  0x1369, 0x1371,
-  0x13a0, 0x13f4,
-  0x1401, 0x166c,
-  0x166f, 0x1676,
-  0x1681, 0x169a,
-  0x16a0, 0x16ea,
-  0x1700, 0x170c,
-  0x170e, 0x1714,
-  0x1720, 0x1734,
-  0x1740, 0x1753,
-  0x1760, 0x176c,
-  0x176e, 0x1770,
-  0x1772, 0x1773,
-  0x1780, 0x17b3,
-  0x17b6, 0x17d3,
-  0x17d7, 0x17d7,
-  0x17dc, 0x17dd,
-  0x17e0, 0x17e9,
-  0x180b, 0x180d,
-  0x1810, 0x1819,
-  0x1820, 0x1877,
-  0x1880, 0x18a9,
-  0x1900, 0x191c,
-  0x1920, 0x192b,
-  0x1930, 0x193b,
-  0x1946, 0x196d,
-  0x1970, 0x1974,
-  0x1d00, 0x1d6b,
-  0x1e00, 0x1e9b,
-  0x1ea0, 0x1ef9,
-  0x1f00, 0x1f15,
-  0x1f18, 0x1f1d,
-  0x1f20, 0x1f45,
-  0x1f48, 0x1f4d,
-  0x1f50, 0x1f57,
-  0x1f59, 0x1f59,
-  0x1f5b, 0x1f5b,
-  0x1f5d, 0x1f5d,
-  0x1f5f, 0x1f7d,
-  0x1f80, 0x1fb4,
-  0x1fb6, 0x1fbc,
-  0x1fbe, 0x1fbe,
-  0x1fc2, 0x1fc4,
-  0x1fc6, 0x1fcc,
-  0x1fd0, 0x1fd3,
-  0x1fd6, 0x1fdb,
-  0x1fe0, 0x1fec,
-  0x1ff2, 0x1ff4,
-  0x1ff6, 0x1ffc,
-  0x2071, 0x2071,
-  0x207f, 0x207f,
-  0x20d0, 0x20ea,
-  0x2102, 0x2102,
-  0x2107, 0x2107,
-  0x210a, 0x2113,
-  0x2115, 0x2115,
-  0x2119, 0x211d,
-  0x2124, 0x2124,
-  0x2126, 0x2126,
-  0x2128, 0x2128,
-  0x212a, 0x212d,
-  0x212f, 0x2131,
-  0x2133, 0x2139,
-  0x213d, 0x213f,
-  0x2145, 0x2149,
-  0x3005, 0x3006,
-  0x302a, 0x302f,
-  0x3031, 0x3035,
-  0x303b, 0x303c,
-  0x3041, 0x3096,
-  0x3099, 0x309a,
-  0x309d, 0x309f,
-  0x30a1, 0x30fa,
-  0x30fc, 0x30ff,
-  0x3105, 0x312c,
-  0x3131, 0x318e,
-  0x31a0, 0x31b7,
-  0x31f0, 0x31ff,
-  0x3400, 0x4db5,
-  0x4e00, 0x9fa5,
-  0xa000, 0xa48c,
-  0xac00, 0xd7a3,
-  0xf900, 0xfa2d,
-  0xfa30, 0xfa6a,
-  0xfb00, 0xfb06,
-  0xfb13, 0xfb17,
-  0xfb1d, 0xfb28,
-  0xfb2a, 0xfb36,
-  0xfb38, 0xfb3c,
-  0xfb3e, 0xfb3e,
-  0xfb40, 0xfb41,
-  0xfb43, 0xfb44,
-  0xfb46, 0xfbb1,
-  0xfbd3, 0xfd3d,
-  0xfd50, 0xfd8f,
-  0xfd92, 0xfdc7,
-  0xfdf0, 0xfdfb,
-  0xfe00, 0xfe0f,
-  0xfe20, 0xfe23,
-  0xfe70, 0xfe74,
-  0xfe76, 0xfefc,
-  0xff10, 0xff19,
-  0xff21, 0xff3a,
-  0xff41, 0xff5a,
-  0xff66, 0xffbe,
-  0xffc2, 0xffc7,
-  0xffca, 0xffcf,
-  0xffd2, 0xffd7,
-  0xffda, 0xffdc,
-  0x10000, 0x1000b,
-  0x1000d, 0x10026,
-  0x10028, 0x1003a,
-  0x1003c, 0x1003d,
-  0x1003f, 0x1004d,
-  0x10050, 0x1005d,
-  0x10080, 0x100fa,
-  0x10300, 0x1031e,
-  0x10330, 0x10349,
-  0x10380, 0x1039d,
-  0x10400, 0x1049d,
-  0x104a0, 0x104a9,
-  0x10800, 0x10805,
-  0x10808, 0x10808,
-  0x1080a, 0x10835,
-  0x10837, 0x10838,
-  0x1083c, 0x1083c,
-  0x1083f, 0x1083f,
-  0x1d165, 0x1d169,
-  0x1d16d, 0x1d172,
-  0x1d17b, 0x1d182,
-  0x1d185, 0x1d18b,
-  0x1d1aa, 0x1d1ad,
-  0x1d400, 0x1d454,
-  0x1d456, 0x1d49c,
-  0x1d49e, 0x1d49f,
-  0x1d4a2, 0x1d4a2,
-  0x1d4a5, 0x1d4a6,
-  0x1d4a9, 0x1d4ac,
-  0x1d4ae, 0x1d4b9,
-  0x1d4bb, 0x1d4bb,
-  0x1d4bd, 0x1d4c3,
-  0x1d4c5, 0x1d505,
-  0x1d507, 0x1d50a,
-  0x1d50d, 0x1d514,
-  0x1d516, 0x1d51c,
-  0x1d51e, 0x1d539,
-  0x1d53b, 0x1d53e,
-  0x1d540, 0x1d544,
-  0x1d546, 0x1d546,
-  0x1d54a, 0x1d550,
-  0x1d552, 0x1d6a3,
-  0x1d6a8, 0x1d6c0,
-  0x1d6c2, 0x1d6da,
-  0x1d6dc, 0x1d6fa,
-  0x1d6fc, 0x1d714,
-  0x1d716, 0x1d734,
-  0x1d736, 0x1d74e,
-  0x1d750, 0x1d76e,
-  0x1d770, 0x1d788,
-  0x1d78a, 0x1d7a8,
-  0x1d7aa, 0x1d7c2,
-  0x1d7c4, 0x1d7c9,
-  0x1d7ce, 0x1d7ff,
-  0x20000, 0x2a6d6,
-  0x2f800, 0x2fa1d,
-  0xe0100, 0xe01ef
-#endif /* USE_UNICODE_FULL_RANGE_CTYPE */
-}; /* end of MBAlnum */
-
-static const OnigCodePoint SBAlpha[] = {
-  2,
-  0x0041, 0x005a,
-  0x0061, 0x007a
-};
-
-static const OnigCodePoint MBAlpha[] = {
-#ifdef USE_UNICODE_FULL_RANGE_CTYPE
-  394,
-#else
-  6,
-#endif
-  0x00aa, 0x00aa,
-  0x00b5, 0x00b5,
-  0x00ba, 0x00ba,
-  0x00c0, 0x00d6,
-  0x00d8, 0x00f6,
-  0x00f8, 0x0236
-#ifdef USE_UNICODE_FULL_RANGE_CTYPE
-  ,
-  0x0250, 0x02c1,
-  0x02c6, 0x02d1,
-  0x02e0, 0x02e4,
-  0x02ee, 0x02ee,
-  0x0300, 0x0357,
-  0x035d, 0x036f,
-  0x037a, 0x037a,
-  0x0386, 0x0386,
-  0x0388, 0x038a,
-  0x038c, 0x038c,
-  0x038e, 0x03a1,
-  0x03a3, 0x03ce,
-  0x03d0, 0x03f5,
-  0x03f7, 0x03fb,
-  0x0400, 0x0481,
-  0x0483, 0x0486,
-  0x0488, 0x04ce,
-  0x04d0, 0x04f5,
-  0x04f8, 0x04f9,
-  0x0500, 0x050f,
-  0x0531, 0x0556,
-  0x0559, 0x0559,
-  0x0561, 0x0587,
-  0x0591, 0x05a1,
-  0x05a3, 0x05b9,
-  0x05bb, 0x05bd,
-  0x05bf, 0x05bf,
-  0x05c1, 0x05c2,
-  0x05c4, 0x05c4,
-  0x05d0, 0x05ea,
-  0x05f0, 0x05f2,
-  0x0610, 0x0615,
-  0x0621, 0x063a,
-  0x0640, 0x0658,
-  0x066e, 0x06d3,
-  0x06d5, 0x06dc,
-  0x06de, 0x06e8,
-  0x06ea, 0x06ef,
-  0x06fa, 0x06fc,
-  0x06ff, 0x06ff,
-  0x0710, 0x074a,
-  0x074d, 0x074f,
-  0x0780, 0x07b1,
-  0x0901, 0x0939,
-  0x093c, 0x094d,
-  0x0950, 0x0954,
-  0x0958, 0x0963,
-  0x0981, 0x0983,
-  0x0985, 0x098c,
-  0x098f, 0x0990,
-  0x0993, 0x09a8,
-  0x09aa, 0x09b0,
-  0x09b2, 0x09b2,
-  0x09b6, 0x09b9,
-  0x09bc, 0x09c4,
-  0x09c7, 0x09c8,
-  0x09cb, 0x09cd,
-  0x09d7, 0x09d7,
-  0x09dc, 0x09dd,
-  0x09df, 0x09e3,
-  0x09f0, 0x09f1,
-  0x0a01, 0x0a03,
-  0x0a05, 0x0a0a,
-  0x0a0f, 0x0a10,
-  0x0a13, 0x0a28,
-  0x0a2a, 0x0a30,
-  0x0a32, 0x0a33,
-  0x0a35, 0x0a36,
-  0x0a38, 0x0a39,
-  0x0a3c, 0x0a3c,
-  0x0a3e, 0x0a42,
-  0x0a47, 0x0a48,
-  0x0a4b, 0x0a4d,
-  0x0a59, 0x0a5c,
-  0x0a5e, 0x0a5e,
-  0x0a70, 0x0a74,
-  0x0a81, 0x0a83,
-  0x0a85, 0x0a8d,
-  0x0a8f, 0x0a91,
-  0x0a93, 0x0aa8,
-  0x0aaa, 0x0ab0,
-  0x0ab2, 0x0ab3,
-  0x0ab5, 0x0ab9,
-  0x0abc, 0x0ac5,
-  0x0ac7, 0x0ac9,
-  0x0acb, 0x0acd,
-  0x0ad0, 0x0ad0,
-  0x0ae0, 0x0ae3,
-  0x0b01, 0x0b03,
-  0x0b05, 0x0b0c,
-  0x0b0f, 0x0b10,
-  0x0b13, 0x0b28,
-  0x0b2a, 0x0b30,
-  0x0b32, 0x0b33,
-  0x0b35, 0x0b39,
-  0x0b3c, 0x0b43,
-  0x0b47, 0x0b48,
-  0x0b4b, 0x0b4d,
-  0x0b56, 0x0b57,
-  0x0b5c, 0x0b5d,
-  0x0b5f, 0x0b61,
-  0x0b71, 0x0b71,
-  0x0b82, 0x0b83,
-  0x0b85, 0x0b8a,
-  0x0b8e, 0x0b90,
-  0x0b92, 0x0b95,
-  0x0b99, 0x0b9a,
-  0x0b9c, 0x0b9c,
-  0x0b9e, 0x0b9f,
-  0x0ba3, 0x0ba4,
-  0x0ba8, 0x0baa,
-  0x0bae, 0x0bb5,
-  0x0bb7, 0x0bb9,
-  0x0bbe, 0x0bc2,
-  0x0bc6, 0x0bc8,
-  0x0bca, 0x0bcd,
-  0x0bd7, 0x0bd7,
-  0x0c01, 0x0c03,
-  0x0c05, 0x0c0c,
-  0x0c0e, 0x0c10,
-  0x0c12, 0x0c28,
-  0x0c2a, 0x0c33,
-  0x0c35, 0x0c39,
-  0x0c3e, 0x0c44,
-  0x0c46, 0x0c48,
-  0x0c4a, 0x0c4d,
-  0x0c55, 0x0c56,
-  0x0c60, 0x0c61,
-  0x0c82, 0x0c83,
-  0x0c85, 0x0c8c,
-  0x0c8e, 0x0c90,
-  0x0c92, 0x0ca8,
-  0x0caa, 0x0cb3,
-  0x0cb5, 0x0cb9,
-  0x0cbc, 0x0cc4,
-  0x0cc6, 0x0cc8,
-  0x0cca, 0x0ccd,
-  0x0cd5, 0x0cd6,
-  0x0cde, 0x0cde,
-  0x0ce0, 0x0ce1,
-  0x0d02, 0x0d03,
-  0x0d05, 0x0d0c,
-  0x0d0e, 0x0d10,
-  0x0d12, 0x0d28,
-  0x0d2a, 0x0d39,
-  0x0d3e, 0x0d43,
-  0x0d46, 0x0d48,
-  0x0d4a, 0x0d4d,
-  0x0d57, 0x0d57,
-  0x0d60, 0x0d61,
-  0x0d82, 0x0d83,
-  0x0d85, 0x0d96,
-  0x0d9a, 0x0db1,
-  0x0db3, 0x0dbb,
-  0x0dbd, 0x0dbd,
-  0x0dc0, 0x0dc6,
-  0x0dca, 0x0dca,
-  0x0dcf, 0x0dd4,
-  0x0dd6, 0x0dd6,
-  0x0dd8, 0x0ddf,
-  0x0df2, 0x0df3,
-  0x0e01, 0x0e3a,
-  0x0e40, 0x0e4e,
-  0x0e81, 0x0e82,
-  0x0e84, 0x0e84,
-  0x0e87, 0x0e88,
-  0x0e8a, 0x0e8a,
-  0x0e8d, 0x0e8d,
-  0x0e94, 0x0e97,
-  0x0e99, 0x0e9f,
-  0x0ea1, 0x0ea3,
-  0x0ea5, 0x0ea5,
-  0x0ea7, 0x0ea7,
-  0x0eaa, 0x0eab,
-  0x0ead, 0x0eb9,
-  0x0ebb, 0x0ebd,
-  0x0ec0, 0x0ec4,
-  0x0ec6, 0x0ec6,
-  0x0ec8, 0x0ecd,
-  0x0edc, 0x0edd,
-  0x0f00, 0x0f00,
-  0x0f18, 0x0f19,
-  0x0f35, 0x0f35,
-  0x0f37, 0x0f37,
-  0x0f39, 0x0f39,
-  0x0f3e, 0x0f47,
-  0x0f49, 0x0f6a,
-  0x0f71, 0x0f84,
-  0x0f86, 0x0f8b,
-  0x0f90, 0x0f97,
-  0x0f99, 0x0fbc,
-  0x0fc6, 0x0fc6,
-  0x1000, 0x1021,
-  0x1023, 0x1027,
-  0x1029, 0x102a,
-  0x102c, 0x1032,
-  0x1036, 0x1039,
-  0x1050, 0x1059,
-  0x10a0, 0x10c5,
-  0x10d0, 0x10f8,
-  0x1100, 0x1159,
-  0x115f, 0x11a2,
-  0x11a8, 0x11f9,
-  0x1200, 0x1206,
-  0x1208, 0x1246,
-  0x1248, 0x1248,
-  0x124a, 0x124d,
-  0x1250, 0x1256,
-  0x1258, 0x1258,
-  0x125a, 0x125d,
-  0x1260, 0x1286,
-  0x1288, 0x1288,
-  0x128a, 0x128d,
-  0x1290, 0x12ae,
-  0x12b0, 0x12b0,
-  0x12b2, 0x12b5,
-  0x12b8, 0x12be,
-  0x12c0, 0x12c0,
-  0x12c2, 0x12c5,
-  0x12c8, 0x12ce,
-  0x12d0, 0x12d6,
-  0x12d8, 0x12ee,
-  0x12f0, 0x130e,
-  0x1310, 0x1310,
-  0x1312, 0x1315,
-  0x1318, 0x131e,
-  0x1320, 0x1346,
-  0x1348, 0x135a,
-  0x13a0, 0x13f4,
-  0x1401, 0x166c,
-  0x166f, 0x1676,
-  0x1681, 0x169a,
-  0x16a0, 0x16ea,
-  0x1700, 0x170c,
-  0x170e, 0x1714,
-  0x1720, 0x1734,
-  0x1740, 0x1753,
-  0x1760, 0x176c,
-  0x176e, 0x1770,
-  0x1772, 0x1773,
-  0x1780, 0x17b3,
-  0x17b6, 0x17d3,
-  0x17d7, 0x17d7,
-  0x17dc, 0x17dd,
-  0x180b, 0x180d,
-  0x1820, 0x1877,
-  0x1880, 0x18a9,
-  0x1900, 0x191c,
-  0x1920, 0x192b,
-  0x1930, 0x193b,
-  0x1950, 0x196d,
-  0x1970, 0x1974,
-  0x1d00, 0x1d6b,
-  0x1e00, 0x1e9b,
-  0x1ea0, 0x1ef9,
-  0x1f00, 0x1f15,
-  0x1f18, 0x1f1d,
-  0x1f20, 0x1f45,
-  0x1f48, 0x1f4d,
-  0x1f50, 0x1f57,
-  0x1f59, 0x1f59,
-  0x1f5b, 0x1f5b,
-  0x1f5d, 0x1f5d,
-  0x1f5f, 0x1f7d,
-  0x1f80, 0x1fb4,
-  0x1fb6, 0x1fbc,
-  0x1fbe, 0x1fbe,
-  0x1fc2, 0x1fc4,
-  0x1fc6, 0x1fcc,
-  0x1fd0, 0x1fd3,
-  0x1fd6, 0x1fdb,
-  0x1fe0, 0x1fec,
-  0x1ff2, 0x1ff4,
-  0x1ff6, 0x1ffc,
-  0x2071, 0x2071,
-  0x207f, 0x207f,
-  0x20d0, 0x20ea,
-  0x2102, 0x2102,
-  0x2107, 0x2107,
-  0x210a, 0x2113,
-  0x2115, 0x2115,
-  0x2119, 0x211d,
-  0x2124, 0x2124,
-  0x2126, 0x2126,
-  0x2128, 0x2128,
-  0x212a, 0x212d,
-  0x212f, 0x2131,
-  0x2133, 0x2139,
-  0x213d, 0x213f,
-  0x2145, 0x2149,
-  0x3005, 0x3006,
-  0x302a, 0x302f,
-  0x3031, 0x3035,
-  0x303b, 0x303c,
-  0x3041, 0x3096,
-  0x3099, 0x309a,
-  0x309d, 0x309f,
-  0x30a1, 0x30fa,
-  0x30fc, 0x30ff,
-  0x3105, 0x312c,
-  0x3131, 0x318e,
-  0x31a0, 0x31b7,
-  0x31f0, 0x31ff,
-  0x3400, 0x4db5,
-  0x4e00, 0x9fa5,
-  0xa000, 0xa48c,
-  0xac00, 0xd7a3,
-  0xf900, 0xfa2d,
-  0xfa30, 0xfa6a,
-  0xfb00, 0xfb06,
-  0xfb13, 0xfb17,
-  0xfb1d, 0xfb28,
-  0xfb2a, 0xfb36,
-  0xfb38, 0xfb3c,
-  0xfb3e, 0xfb3e,
-  0xfb40, 0xfb41,
-  0xfb43, 0xfb44,
-  0xfb46, 0xfbb1,
-  0xfbd3, 0xfd3d,
-  0xfd50, 0xfd8f,
-  0xfd92, 0xfdc7,
-  0xfdf0, 0xfdfb,
-  0xfe00, 0xfe0f,
-  0xfe20, 0xfe23,
-  0xfe70, 0xfe74,
-  0xfe76, 0xfefc,
-  0xff21, 0xff3a,
-  0xff41, 0xff5a,
-  0xff66, 0xffbe,
-  0xffc2, 0xffc7,
-  0xffca, 0xffcf,
-  0xffd2, 0xffd7,
-  0xffda, 0xffdc,
-  0x10000, 0x1000b,
-  0x1000d, 0x10026,
-  0x10028, 0x1003a,
-  0x1003c, 0x1003d,
-  0x1003f, 0x1004d,
-  0x10050, 0x1005d,
-  0x10080, 0x100fa,
-  0x10300, 0x1031e,
-  0x10330, 0x10349,
-  0x10380, 0x1039d,
-  0x10400, 0x1049d,
-  0x10800, 0x10805,
-  0x10808, 0x10808,
-  0x1080a, 0x10835,
-  0x10837, 0x10838,
-  0x1083c, 0x1083c,
-  0x1083f, 0x1083f,
-  0x1d165, 0x1d169,
-  0x1d16d, 0x1d172,
-  0x1d17b, 0x1d182,
-  0x1d185, 0x1d18b,
-  0x1d1aa, 0x1d1ad,
-  0x1d400, 0x1d454,
-  0x1d456, 0x1d49c,
-  0x1d49e, 0x1d49f,
-  0x1d4a2, 0x1d4a2,
-  0x1d4a5, 0x1d4a6,
-  0x1d4a9, 0x1d4ac,
-  0x1d4ae, 0x1d4b9,
-  0x1d4bb, 0x1d4bb,
-  0x1d4bd, 0x1d4c3,
-  0x1d4c5, 0x1d505,
-  0x1d507, 0x1d50a,
-  0x1d50d, 0x1d514,
-  0x1d516, 0x1d51c,
-  0x1d51e, 0x1d539,
-  0x1d53b, 0x1d53e,
-  0x1d540, 0x1d544,
-  0x1d546, 0x1d546,
-  0x1d54a, 0x1d550,
-  0x1d552, 0x1d6a3,
-  0x1d6a8, 0x1d6c0,
-  0x1d6c2, 0x1d6da,
-  0x1d6dc, 0x1d6fa,
-  0x1d6fc, 0x1d714,
-  0x1d716, 0x1d734,
-  0x1d736, 0x1d74e,
-  0x1d750, 0x1d76e,
-  0x1d770, 0x1d788,
-  0x1d78a, 0x1d7a8,
-  0x1d7aa, 0x1d7c2,
-  0x1d7c4, 0x1d7c9,
-  0x20000, 0x2a6d6,
-  0x2f800, 0x2fa1d,
-  0xe0100, 0xe01ef
-#endif /* USE_UNICODE_FULL_RANGE_CTYPE */
-}; /* end of MBAlpha */
-
-static const OnigCodePoint SBBlank[] = {
-  2,
-  0x0009, 0x0009,
-  0x0020, 0x0020
-};
-
-static const OnigCodePoint MBBlank[] = {
-#ifdef USE_UNICODE_FULL_RANGE_CTYPE
-  7,
-#else
-  1,
-#endif
-  0x00a0, 0x00a0
-#ifdef USE_UNICODE_FULL_RANGE_CTYPE
-  ,
-  0x1680, 0x1680,
-  0x180e, 0x180e,
-  0x2000, 0x200a,
-  0x202f, 0x202f,
-  0x205f, 0x205f,
-  0x3000, 0x3000
-#endif /* USE_UNICODE_FULL_RANGE_CTYPE */
-}; /* end of MBBlank */
-
-static const OnigCodePoint SBCntrl[] = {
-  2,
-  0x0000, 0x001f,
-  0x007f, 0x007f
-};
-
-static const OnigCodePoint MBCntrl[] = {
-#ifdef USE_UNICODE_FULL_RANGE_CTYPE
-  18,
-#else
-  2,
-#endif
-  0x0080, 0x009f,
-  0x00ad, 0x00ad
-#ifdef USE_UNICODE_FULL_RANGE_CTYPE
-  ,
-  0x0600, 0x0603,
-  0x06dd, 0x06dd,
-  0x070f, 0x070f,
-  0x17b4, 0x17b5,
-  0x200b, 0x200f,
-  0x202a, 0x202e,
-  0x2060, 0x2063,
-  0x206a, 0x206f,
-  0xd800, 0xf8ff,
-  0xfeff, 0xfeff,
-  0xfff9, 0xfffb,
-  0x1d173, 0x1d17a,
-  0xe0001, 0xe0001,
-  0xe0020, 0xe007f,
-  0xf0000, 0xffffd,
-  0x100000, 0x10fffd
-#endif /* USE_UNICODE_FULL_RANGE_CTYPE */
-}; /* end of MBCntrl */
-
-static const OnigCodePoint SBDigit[] = {
-  1,
-  0x0030, 0x0039
-};
-
-static const OnigCodePoint MBDigit[] = {
-#ifdef USE_UNICODE_FULL_RANGE_CTYPE
-  22,
-#else
-  0
-#endif
-#ifdef USE_UNICODE_FULL_RANGE_CTYPE
-  0x0660, 0x0669,
-  0x06f0, 0x06f9,
-  0x0966, 0x096f,
-  0x09e6, 0x09ef,
-  0x0a66, 0x0a6f,
-  0x0ae6, 0x0aef,
-  0x0b66, 0x0b6f,
-  0x0be7, 0x0bef,
-  0x0c66, 0x0c6f,
-  0x0ce6, 0x0cef,
-  0x0d66, 0x0d6f,
-  0x0e50, 0x0e59,
-  0x0ed0, 0x0ed9,
-  0x0f20, 0x0f29,
-  0x1040, 0x1049,
-  0x1369, 0x1371,
-  0x17e0, 0x17e9,
-  0x1810, 0x1819,
-  0x1946, 0x194f,
-  0xff10, 0xff19,
-  0x104a0, 0x104a9,
-  0x1d7ce, 0x1d7ff
-#endif /* USE_UNICODE_FULL_RANGE_CTYPE */
-}; /* end of MBDigit */
-
-static const OnigCodePoint SBGraph[] = {
-  1,
-  0x0021, 0x007e
-};
-
-static const OnigCodePoint MBGraph[] = {
-#ifdef USE_UNICODE_FULL_RANGE_CTYPE
-  404,
-#else
-  1,
-#endif
-  0x00a1, 0x0236
-#ifdef USE_UNICODE_FULL_RANGE_CTYPE
-  ,
-  0x0250, 0x0357,
-  0x035d, 0x036f,
-  0x0374, 0x0375,
-  0x037a, 0x037a,
-  0x037e, 0x037e,
-  0x0384, 0x038a,
-  0x038c, 0x038c,
-  0x038e, 0x03a1,
-  0x03a3, 0x03ce,
-  0x03d0, 0x03fb,
-  0x0400, 0x0486,
-  0x0488, 0x04ce,
-  0x04d0, 0x04f5,
-  0x04f8, 0x04f9,
-  0x0500, 0x050f,
-  0x0531, 0x0556,
-  0x0559, 0x055f,
-  0x0561, 0x0587,
-  0x0589, 0x058a,
-  0x0591, 0x05a1,
-  0x05a3, 0x05b9,
-  0x05bb, 0x05c4,
-  0x05d0, 0x05ea,
-  0x05f0, 0x05f4,
-  0x0600, 0x0603,
-  0x060c, 0x0615,
-  0x061b, 0x061b,
-  0x061f, 0x061f,
-  0x0621, 0x063a,
-  0x0640, 0x0658,
-  0x0660, 0x070d,
-  0x070f, 0x074a,
-  0x074d, 0x074f,
-  0x0780, 0x07b1,
-  0x0901, 0x0939,
-  0x093c, 0x094d,
-  0x0950, 0x0954,
-  0x0958, 0x0970,
-  0x0981, 0x0983,
-  0x0985, 0x098c,
-  0x098f, 0x0990,
-  0x0993, 0x09a8,
-  0x09aa, 0x09b0,
-  0x09b2, 0x09b2,
-  0x09b6, 0x09b9,
-  0x09bc, 0x09c4,
-  0x09c7, 0x09c8,
-  0x09cb, 0x09cd,
-  0x09d7, 0x09d7,
-  0x09dc, 0x09dd,
-  0x09df, 0x09e3,
-  0x09e6, 0x09fa,
-  0x0a01, 0x0a03,
-  0x0a05, 0x0a0a,
-  0x0a0f, 0x0a10,
-  0x0a13, 0x0a28,
-  0x0a2a, 0x0a30,
-  0x0a32, 0x0a33,
-  0x0a35, 0x0a36,
-  0x0a38, 0x0a39,
-  0x0a3c, 0x0a3c,
-  0x0a3e, 0x0a42,
-  0x0a47, 0x0a48,
-  0x0a4b, 0x0a4d,
-  0x0a59, 0x0a5c,
-  0x0a5e, 0x0a5e,
-  0x0a66, 0x0a74,
-  0x0a81, 0x0a83,
-  0x0a85, 0x0a8d,
-  0x0a8f, 0x0a91,
-  0x0a93, 0x0aa8,
-  0x0aaa, 0x0ab0,
-  0x0ab2, 0x0ab3,
-  0x0ab5, 0x0ab9,
-  0x0abc, 0x0ac5,
-  0x0ac7, 0x0ac9,
-  0x0acb, 0x0acd,
-  0x0ad0, 0x0ad0,
-  0x0ae0, 0x0ae3,
-  0x0ae6, 0x0aef,
-  0x0af1, 0x0af1,
-  0x0b01, 0x0b03,
-  0x0b05, 0x0b0c,
-  0x0b0f, 0x0b10,
-  0x0b13, 0x0b28,
-  0x0b2a, 0x0b30,
-  0x0b32, 0x0b33,
-  0x0b35, 0x0b39,
-  0x0b3c, 0x0b43,
-  0x0b47, 0x0b48,
-  0x0b4b, 0x0b4d,
-  0x0b56, 0x0b57,
-  0x0b5c, 0x0b5d,
-  0x0b5f, 0x0b61,
-  0x0b66, 0x0b71,
-  0x0b82, 0x0b83,
-  0x0b85, 0x0b8a,
-  0x0b8e, 0x0b90,
-  0x0b92, 0x0b95,
-  0x0b99, 0x0b9a,
-  0x0b9c, 0x0b9c,
-  0x0b9e, 0x0b9f,
-  0x0ba3, 0x0ba4,
-  0x0ba8, 0x0baa,
-  0x0bae, 0x0bb5,
-  0x0bb7, 0x0bb9,
-  0x0bbe, 0x0bc2,
-  0x0bc6, 0x0bc8,
-  0x0bca, 0x0bcd,
-  0x0bd7, 0x0bd7,
-  0x0be7, 0x0bfa,
-  0x0c01, 0x0c03,
-  0x0c05, 0x0c0c,
-  0x0c0e, 0x0c10,
-  0x0c12, 0x0c28,
-  0x0c2a, 0x0c33,
-  0x0c35, 0x0c39,
-  0x0c3e, 0x0c44,
-  0x0c46, 0x0c48,
-  0x0c4a, 0x0c4d,
-  0x0c55, 0x0c56,
-  0x0c60, 0x0c61,
-  0x0c66, 0x0c6f,
-  0x0c82, 0x0c83,
-  0x0c85, 0x0c8c,
-  0x0c8e, 0x0c90,
-  0x0c92, 0x0ca8,
-  0x0caa, 0x0cb3,
-  0x0cb5, 0x0cb9,
-  0x0cbc, 0x0cc4,
-  0x0cc6, 0x0cc8,
-  0x0cca, 0x0ccd,
-  0x0cd5, 0x0cd6,
-  0x0cde, 0x0cde,
-  0x0ce0, 0x0ce1,
-  0x0ce6, 0x0cef,
-  0x0d02, 0x0d03,
-  0x0d05, 0x0d0c,
-  0x0d0e, 0x0d10,
-  0x0d12, 0x0d28,
-  0x0d2a, 0x0d39,
-  0x0d3e, 0x0d43,
-  0x0d46, 0x0d48,
-  0x0d4a, 0x0d4d,
-  0x0d57, 0x0d57,
-  0x0d60, 0x0d61,
-  0x0d66, 0x0d6f,
-  0x0d82, 0x0d83,
-  0x0d85, 0x0d96,
-  0x0d9a, 0x0db1,
-  0x0db3, 0x0dbb,
-  0x0dbd, 0x0dbd,
-  0x0dc0, 0x0dc6,
-  0x0dca, 0x0dca,
-  0x0dcf, 0x0dd4,
-  0x0dd6, 0x0dd6,
-  0x0dd8, 0x0ddf,
-  0x0df2, 0x0df4,
-  0x0e01, 0x0e3a,
-  0x0e3f, 0x0e5b,
-  0x0e81, 0x0e82,
-  0x0e84, 0x0e84,
-  0x0e87, 0x0e88,
-  0x0e8a, 0x0e8a,
-  0x0e8d, 0x0e8d,
-  0x0e94, 0x0e97,
-  0x0e99, 0x0e9f,
-  0x0ea1, 0x0ea3,
-  0x0ea5, 0x0ea5,
-  0x0ea7, 0x0ea7,
-  0x0eaa, 0x0eab,
-  0x0ead, 0x0eb9,
-  0x0ebb, 0x0ebd,
-  0x0ec0, 0x0ec4,
-  0x0ec6, 0x0ec6,
-  0x0ec8, 0x0ecd,
-  0x0ed0, 0x0ed9,
-  0x0edc, 0x0edd,
-  0x0f00, 0x0f47,
-  0x0f49, 0x0f6a,
-  0x0f71, 0x0f8b,
-  0x0f90, 0x0f97,
-  0x0f99, 0x0fbc,
-  0x0fbe, 0x0fcc,
-  0x0fcf, 0x0fcf,
-  0x1000, 0x1021,
-  0x1023, 0x1027,
-  0x1029, 0x102a,
-  0x102c, 0x1032,
-  0x1036, 0x1039,
-  0x1040, 0x1059,
-  0x10a0, 0x10c5,
-  0x10d0, 0x10f8,
-  0x10fb, 0x10fb,
-  0x1100, 0x1159,
-  0x115f, 0x11a2,
-  0x11a8, 0x11f9,
-  0x1200, 0x1206,
-  0x1208, 0x1246,
-  0x1248, 0x1248,
-  0x124a, 0x124d,
-  0x1250, 0x1256,
-  0x1258, 0x1258,
-  0x125a, 0x125d,
-  0x1260, 0x1286,
-  0x1288, 0x1288,
-  0x128a, 0x128d,
-  0x1290, 0x12ae,
-  0x12b0, 0x12b0,
-  0x12b2, 0x12b5,
-  0x12b8, 0x12be,
-  0x12c0, 0x12c0,
-  0x12c2, 0x12c5,
-  0x12c8, 0x12ce,
-  0x12d0, 0x12d6,
-  0x12d8, 0x12ee,
-  0x12f0, 0x130e,
-  0x1310, 0x1310,
-  0x1312, 0x1315,
-  0x1318, 0x131e,
-  0x1320, 0x1346,
-  0x1348, 0x135a,
-  0x1361, 0x137c,
-  0x13a0, 0x13f4,
-  0x1401, 0x1676,
-  0x1681, 0x169c,
-  0x16a0, 0x16f0,
-  0x1700, 0x170c,
-  0x170e, 0x1714,
-  0x1720, 0x1736,
-  0x1740, 0x1753,
-  0x1760, 0x176c,
-  0x176e, 0x1770,
-  0x1772, 0x1773,
-  0x1780, 0x17dd,
-  0x17e0, 0x17e9,
-  0x17f0, 0x17f9,
-  0x1800, 0x180d,
-  0x1810, 0x1819,
-  0x1820, 0x1877,
-  0x1880, 0x18a9,
-  0x1900, 0x191c,
-  0x1920, 0x192b,
-  0x1930, 0x193b,
-  0x1940, 0x1940,
-  0x1944, 0x196d,
-  0x1970, 0x1974,
-  0x19e0, 0x19ff,
-  0x1d00, 0x1d6b,
-  0x1e00, 0x1e9b,
-  0x1ea0, 0x1ef9,
-  0x1f00, 0x1f15,
-  0x1f18, 0x1f1d,
-  0x1f20, 0x1f45,
-  0x1f48, 0x1f4d,
-  0x1f50, 0x1f57,
-  0x1f59, 0x1f59,
-  0x1f5b, 0x1f5b,
-  0x1f5d, 0x1f5d,
-  0x1f5f, 0x1f7d,
-  0x1f80, 0x1fb4,
-  0x1fb6, 0x1fc4,
-  0x1fc6, 0x1fd3,
-  0x1fd6, 0x1fdb,
-  0x1fdd, 0x1fef,
-  0x1ff2, 0x1ff4,
-  0x1ff6, 0x1ffe,
-  0x200b, 0x2027,
-  0x202a, 0x202e,
-  0x2030, 0x2054,
-  0x2057, 0x2057,
-  0x2060, 0x2063,
-  0x206a, 0x2071,
-  0x2074, 0x208e,
-  0x20a0, 0x20b1,
-  0x20d0, 0x20ea,
-  0x2100, 0x213b,
-  0x213d, 0x214b,
-  0x2153, 0x2183,
-  0x2190, 0x23d0,
-  0x2400, 0x2426,
-  0x2440, 0x244a,
-  0x2460, 0x2617,
-  0x2619, 0x267d,
-  0x2680, 0x2691,
-  0x26a0, 0x26a1,
-  0x2701, 0x2704,
-  0x2706, 0x2709,
-  0x270c, 0x2727,
-  0x2729, 0x274b,
-  0x274d, 0x274d,
-  0x274f, 0x2752,
-  0x2756, 0x2756,
-  0x2758, 0x275e,
-  0x2761, 0x2794,
-  0x2798, 0x27af,
-  0x27b1, 0x27be,
-  0x27d0, 0x27eb,
-  0x27f0, 0x2b0d,
-  0x2e80, 0x2e99,
-  0x2e9b, 0x2ef3,
-  0x2f00, 0x2fd5,
-  0x2ff0, 0x2ffb,
-  0x3001, 0x303f,
-  0x3041, 0x3096,
-  0x3099, 0x30ff,
-  0x3105, 0x312c,
-  0x3131, 0x318e,
-  0x3190, 0x31b7,
-  0x31f0, 0x321e,
-  0x3220, 0x3243,
-  0x3250, 0x327d,
-  0x327f, 0x32fe,
-  0x3300, 0x4db5,
-  0x4dc0, 0x9fa5,
-  0xa000, 0xa48c,
-  0xa490, 0xa4c6,
-  0xac00, 0xd7a3,
-  0xe000, 0xfa2d,
-  0xfa30, 0xfa6a,
-  0xfb00, 0xfb06,
-  0xfb13, 0xfb17,
-  0xfb1d, 0xfb36,
-  0xfb38, 0xfb3c,
-  0xfb3e, 0xfb3e,
-  0xfb40, 0xfb41,
-  0xfb43, 0xfb44,
-  0xfb46, 0xfbb1,
-  0xfbd3, 0xfd3f,
-  0xfd50, 0xfd8f,
-  0xfd92, 0xfdc7,
-  0xfdf0, 0xfdfd,
-  0xfe00, 0xfe0f,
-  0xfe20, 0xfe23,
-  0xfe30, 0xfe52,
-  0xfe54, 0xfe66,
-  0xfe68, 0xfe6b,
-  0xfe70, 0xfe74,
-  0xfe76, 0xfefc,
-  0xfeff, 0xfeff,
-  0xff01, 0xffbe,
-  0xffc2, 0xffc7,
-  0xffca, 0xffcf,
-  0xffd2, 0xffd7,
-  0xffda, 0xffdc,
-  0xffe0, 0xffe6,
-  0xffe8, 0xffee,
-  0xfff9, 0xfffd,
-  0x10000, 0x1000b,
-  0x1000d, 0x10026,
-  0x10028, 0x1003a,
-  0x1003c, 0x1003d,
-  0x1003f, 0x1004d,
-  0x10050, 0x1005d,
-  0x10080, 0x100fa,
-  0x10100, 0x10102,
-  0x10107, 0x10133,
-  0x10137, 0x1013f,
-  0x10300, 0x1031e,
-  0x10320, 0x10323,
-  0x10330, 0x1034a,
-  0x10380, 0x1039d,
-  0x1039f, 0x1039f,
-  0x10400, 0x1049d,
-  0x104a0, 0x104a9,
-  0x10800, 0x10805,
-  0x10808, 0x10808,
-  0x1080a, 0x10835,
-  0x10837, 0x10838,
-  0x1083c, 0x1083c,
-  0x1083f, 0x1083f,
-  0x1d000, 0x1d0f5,
-  0x1d100, 0x1d126,
-  0x1d12a, 0x1d1dd,
-  0x1d300, 0x1d356,
-  0x1d400, 0x1d454,
-  0x1d456, 0x1d49c,
-  0x1d49e, 0x1d49f,
-  0x1d4a2, 0x1d4a2,
-  0x1d4a5, 0x1d4a6,
-  0x1d4a9, 0x1d4ac,
-  0x1d4ae, 0x1d4b9,
-  0x1d4bb, 0x1d4bb,
-  0x1d4bd, 0x1d4c3,
-  0x1d4c5, 0x1d505,
-  0x1d507, 0x1d50a,
-  0x1d50d, 0x1d514,
-  0x1d516, 0x1d51c,
-  0x1d51e, 0x1d539,
-  0x1d53b, 0x1d53e,
-  0x1d540, 0x1d544,
-  0x1d546, 0x1d546,
-  0x1d54a, 0x1d550,
-  0x1d552, 0x1d6a3,
-  0x1d6a8, 0x1d7c9,
-  0x1d7ce, 0x1d7ff,
-  0x20000, 0x2a6d6,
-  0x2f800, 0x2fa1d,
-  0xe0001, 0xe0001,
-  0xe0020, 0xe007f,
-  0xe0100, 0xe01ef,
-  0xf0000, 0xffffd,
-  0x100000, 0x10fffd
-#endif /* USE_UNICODE_FULL_RANGE_CTYPE */
-}; /* end of MBGraph */
-
-static const OnigCodePoint SBLower[] = {
-  1,
-  0x0061, 0x007a
-};
-
-static const OnigCodePoint MBLower[] = {
-#ifdef USE_UNICODE_FULL_RANGE_CTYPE
-  423,
-#else
-  5,
-#endif
-  0x00aa, 0x00aa,
-  0x00b5, 0x00b5,
-  0x00ba, 0x00ba,
-  0x00df, 0x00f6,
-  0x00f8, 0x00ff
-#ifdef USE_UNICODE_FULL_RANGE_CTYPE
-  ,
-  0x0101, 0x0101,
-  0x0103, 0x0103,
-  0x0105, 0x0105,
-  0x0107, 0x0107,
-  0x0109, 0x0109,
-  0x010b, 0x010b,
-  0x010d, 0x010d,
-  0x010f, 0x010f,
-  0x0111, 0x0111,
-  0x0113, 0x0113,
-  0x0115, 0x0115,
-  0x0117, 0x0117,
-  0x0119, 0x0119,
-  0x011b, 0x011b,
-  0x011d, 0x011d,
-  0x011f, 0x011f,
-  0x0121, 0x0121,
-  0x0123, 0x0123,
-  0x0125, 0x0125,
-  0x0127, 0x0127,
-  0x0129, 0x0129,
-  0x012b, 0x012b,
-  0x012d, 0x012d,
-  0x012f, 0x012f,
-  0x0131, 0x0131,
-  0x0133, 0x0133,
-  0x0135, 0x0135,
-  0x0137, 0x0138,
-  0x013a, 0x013a,
-  0x013c, 0x013c,
-  0x013e, 0x013e,
-  0x0140, 0x0140,
-  0x0142, 0x0142,
-  0x0144, 0x0144,
-  0x0146, 0x0146,
-  0x0148, 0x0149,
-  0x014b, 0x014b,
-  0x014d, 0x014d,
-  0x014f, 0x014f,
-  0x0151, 0x0151,
-  0x0153, 0x0153,
-  0x0155, 0x0155,
-  0x0157, 0x0157,
-  0x0159, 0x0159,
-  0x015b, 0x015b,
-  0x015d, 0x015d,
-  0x015f, 0x015f,
-  0x0161, 0x0161,
-  0x0163, 0x0163,
-  0x0165, 0x0165,
-  0x0167, 0x0167,
-  0x0169, 0x0169,
-  0x016b, 0x016b,
-  0x016d, 0x016d,
-  0x016f, 0x016f,
-  0x0171, 0x0171,
-  0x0173, 0x0173,
-  0x0175, 0x0175,
-  0x0177, 0x0177,
-  0x017a, 0x017a,
-  0x017c, 0x017c,
-  0x017e, 0x0180,
-  0x0183, 0x0183,
-  0x0185, 0x0185,
-  0x0188, 0x0188,
-  0x018c, 0x018d,
-  0x0192, 0x0192,
-  0x0195, 0x0195,
-  0x0199, 0x019b,
-  0x019e, 0x019e,
-  0x01a1, 0x01a1,
-  0x01a3, 0x01a3,
-  0x01a5, 0x01a5,
-  0x01a8, 0x01a8,
-  0x01aa, 0x01ab,
-  0x01ad, 0x01ad,
-  0x01b0, 0x01b0,
-  0x01b4, 0x01b4,
-  0x01b6, 0x01b6,
-  0x01b9, 0x01ba,
-  0x01bd, 0x01bf,
-  0x01c6, 0x01c6,
-  0x01c9, 0x01c9,
-  0x01cc, 0x01cc,
-  0x01ce, 0x01ce,
-  0x01d0, 0x01d0,
-  0x01d2, 0x01d2,
-  0x01d4, 0x01d4,
-  0x01d6, 0x01d6,
-  0x01d8, 0x01d8,
-  0x01da, 0x01da,
-  0x01dc, 0x01dd,
-  0x01df, 0x01df,
-  0x01e1, 0x01e1,
-  0x01e3, 0x01e3,
-  0x01e5, 0x01e5,
-  0x01e7, 0x01e7,
-  0x01e9, 0x01e9,
-  0x01eb, 0x01eb,
-  0x01ed, 0x01ed,
-  0x01ef, 0x01f0,
-  0x01f3, 0x01f3,
-  0x01f5, 0x01f5,
-  0x01f9, 0x01f9,
-  0x01fb, 0x01fb,
-  0x01fd, 0x01fd,
-  0x01ff, 0x01ff,
-  0x0201, 0x0201,
-  0x0203, 0x0203,
-  0x0205, 0x0205,
-  0x0207, 0x0207,
-  0x0209, 0x0209,
-  0x020b, 0x020b,
-  0x020d, 0x020d,
-  0x020f, 0x020f,
-  0x0211, 0x0211,
-  0x0213, 0x0213,
-  0x0215, 0x0215,
-  0x0217, 0x0217,
-  0x0219, 0x0219,
-  0x021b, 0x021b,
-  0x021d, 0x021d,
-  0x021f, 0x021f,
-  0x0221, 0x0221,
-  0x0223, 0x0223,
-  0x0225, 0x0225,
-  0x0227, 0x0227,
-  0x0229, 0x0229,
-  0x022b, 0x022b,
-  0x022d, 0x022d,
-  0x022f, 0x022f,
-  0x0231, 0x0231,
-  0x0233, 0x0236,
-  0x0250, 0x02af,
-  0x0390, 0x0390,
-  0x03ac, 0x03ce,
-  0x03d0, 0x03d1,
-  0x03d5, 0x03d7,
-  0x03d9, 0x03d9,
-  0x03db, 0x03db,
-  0x03dd, 0x03dd,
-  0x03df, 0x03df,
-  0x03e1, 0x03e1,
-  0x03e3, 0x03e3,
-  0x03e5, 0x03e5,
-  0x03e7, 0x03e7,
-  0x03e9, 0x03e9,
-  0x03eb, 0x03eb,
-  0x03ed, 0x03ed,
-  0x03ef, 0x03f3,
-  0x03f5, 0x03f5,
-  0x03f8, 0x03f8,
-  0x03fb, 0x03fb,
-  0x0430, 0x045f,
-  0x0461, 0x0461,
-  0x0463, 0x0463,
-  0x0465, 0x0465,
-  0x0467, 0x0467,
-  0x0469, 0x0469,
-  0x046b, 0x046b,
-  0x046d, 0x046d,
-  0x046f, 0x046f,
-  0x0471, 0x0471,
-  0x0473, 0x0473,
-  0x0475, 0x0475,
-  0x0477, 0x0477,
-  0x0479, 0x0479,
-  0x047b, 0x047b,
-  0x047d, 0x047d,
-  0x047f, 0x047f,
-  0x0481, 0x0481,
-  0x048b, 0x048b,
-  0x048d, 0x048d,
-  0x048f, 0x048f,
-  0x0491, 0x0491,
-  0x0493, 0x0493,
-  0x0495, 0x0495,
-  0x0497, 0x0497,
-  0x0499, 0x0499,
-  0x049b, 0x049b,
-  0x049d, 0x049d,
-  0x049f, 0x049f,
-  0x04a1, 0x04a1,
-  0x04a3, 0x04a3,
-  0x04a5, 0x04a5,
-  0x04a7, 0x04a7,
-  0x04a9, 0x04a9,
-  0x04ab, 0x04ab,
-  0x04ad, 0x04ad,
-  0x04af, 0x04af,
-  0x04b1, 0x04b1,
-  0x04b3, 0x04b3,
-  0x04b5, 0x04b5,
-  0x04b7, 0x04b7,
-  0x04b9, 0x04b9,
-  0x04bb, 0x04bb,
-  0x04bd, 0x04bd,
-  0x04bf, 0x04bf,
-  0x04c2, 0x04c2,
-  0x04c4, 0x04c4,
-  0x04c6, 0x04c6,
-  0x04c8, 0x04c8,
-  0x04ca, 0x04ca,
-  0x04cc, 0x04cc,
-  0x04ce, 0x04ce,
-  0x04d1, 0x04d1,
-  0x04d3, 0x04d3,
-  0x04d5, 0x04d5,
-  0x04d7, 0x04d7,
-  0x04d9, 0x04d9,
-  0x04db, 0x04db,
-  0x04dd, 0x04dd,
-  0x04df, 0x04df,
-  0x04e1, 0x04e1,
-  0x04e3, 0x04e3,
-  0x04e5, 0x04e5,
-  0x04e7, 0x04e7,
-  0x04e9, 0x04e9,
-  0x04eb, 0x04eb,
-  0x04ed, 0x04ed,
-  0x04ef, 0x04ef,
-  0x04f1, 0x04f1,
-  0x04f3, 0x04f3,
-  0x04f5, 0x04f5,
-  0x04f9, 0x04f9,
-  0x0501, 0x0501,
-  0x0503, 0x0503,
-  0x0505, 0x0505,
-  0x0507, 0x0507,
-  0x0509, 0x0509,
-  0x050b, 0x050b,
-  0x050d, 0x050d,
-  0x050f, 0x050f,
-  0x0561, 0x0587,
-  0x1d00, 0x1d2b,
-  0x1d62, 0x1d6b,
-  0x1e01, 0x1e01,
-  0x1e03, 0x1e03,
-  0x1e05, 0x1e05,
-  0x1e07, 0x1e07,
-  0x1e09, 0x1e09,
-  0x1e0b, 0x1e0b,
-  0x1e0d, 0x1e0d,
-  0x1e0f, 0x1e0f,
-  0x1e11, 0x1e11,
-  0x1e13, 0x1e13,
-  0x1e15, 0x1e15,
-  0x1e17, 0x1e17,
-  0x1e19, 0x1e19,
-  0x1e1b, 0x1e1b,
-  0x1e1d, 0x1e1d,
-  0x1e1f, 0x1e1f,
-  0x1e21, 0x1e21,
-  0x1e23, 0x1e23,
-  0x1e25, 0x1e25,
-  0x1e27, 0x1e27,
-  0x1e29, 0x1e29,
-  0x1e2b, 0x1e2b,
-  0x1e2d, 0x1e2d,
-  0x1e2f, 0x1e2f,
-  0x1e31, 0x1e31,
-  0x1e33, 0x1e33,
-  0x1e35, 0x1e35,
-  0x1e37, 0x1e37,
-  0x1e39, 0x1e39,
-  0x1e3b, 0x1e3b,
-  0x1e3d, 0x1e3d,
-  0x1e3f, 0x1e3f,
-  0x1e41, 0x1e41,
-  0x1e43, 0x1e43,
-  0x1e45, 0x1e45,
-  0x1e47, 0x1e47,
-  0x1e49, 0x1e49,
-  0x1e4b, 0x1e4b,
-  0x1e4d, 0x1e4d,
-  0x1e4f, 0x1e4f,
-  0x1e51, 0x1e51,
-  0x1e53, 0x1e53,
-  0x1e55, 0x1e55,
-  0x1e57, 0x1e57,
-  0x1e59, 0x1e59,
-  0x1e5b, 0x1e5b,
-  0x1e5d, 0x1e5d,
-  0x1e5f, 0x1e5f,
-  0x1e61, 0x1e61,
-  0x1e63, 0x1e63,
-  0x1e65, 0x1e65,
-  0x1e67, 0x1e67,
-  0x1e69, 0x1e69,
-  0x1e6b, 0x1e6b,
-  0x1e6d, 0x1e6d,
-  0x1e6f, 0x1e6f,
-  0x1e71, 0x1e71,
-  0x1e73, 0x1e73,
-  0x1e75, 0x1e75,
-  0x1e77, 0x1e77,
-  0x1e79, 0x1e79,
-  0x1e7b, 0x1e7b,
-  0x1e7d, 0x1e7d,
-  0x1e7f, 0x1e7f,
-  0x1e81, 0x1e81,
-  0x1e83, 0x1e83,
-  0x1e85, 0x1e85,
-  0x1e87, 0x1e87,
-  0x1e89, 0x1e89,
-  0x1e8b, 0x1e8b,
-  0x1e8d, 0x1e8d,
-  0x1e8f, 0x1e8f,
-  0x1e91, 0x1e91,
-  0x1e93, 0x1e93,
-  0x1e95, 0x1e9b,
-  0x1ea1, 0x1ea1,
-  0x1ea3, 0x1ea3,
-  0x1ea5, 0x1ea5,
-  0x1ea7, 0x1ea7,
-  0x1ea9, 0x1ea9,
-  0x1eab, 0x1eab,
-  0x1ead, 0x1ead,
-  0x1eaf, 0x1eaf,
-  0x1eb1, 0x1eb1,
-  0x1eb3, 0x1eb3,
-  0x1eb5, 0x1eb5,
-  0x1eb7, 0x1eb7,
-  0x1eb9, 0x1eb9,
-  0x1ebb, 0x1ebb,
-  0x1ebd, 0x1ebd,
-  0x1ebf, 0x1ebf,
-  0x1ec1, 0x1ec1,
-  0x1ec3, 0x1ec3,
-  0x1ec5, 0x1ec5,
-  0x1ec7, 0x1ec7,
-  0x1ec9, 0x1ec9,
-  0x1ecb, 0x1ecb,
-  0x1ecd, 0x1ecd,
-  0x1ecf, 0x1ecf,
-  0x1ed1, 0x1ed1,
-  0x1ed3, 0x1ed3,
-  0x1ed5, 0x1ed5,
-  0x1ed7, 0x1ed7,
-  0x1ed9, 0x1ed9,
-  0x1edb, 0x1edb,
-  0x1edd, 0x1edd,
-  0x1edf, 0x1edf,
-  0x1ee1, 0x1ee1,
-  0x1ee3, 0x1ee3,
-  0x1ee5, 0x1ee5,
-  0x1ee7, 0x1ee7,
-  0x1ee9, 0x1ee9,
-  0x1eeb, 0x1eeb,
-  0x1eed, 0x1eed,
-  0x1eef, 0x1eef,
-  0x1ef1, 0x1ef1,
-  0x1ef3, 0x1ef3,
-  0x1ef5, 0x1ef5,
-  0x1ef7, 0x1ef7,
-  0x1ef9, 0x1ef9,
-  0x1f00, 0x1f07,
-  0x1f10, 0x1f15,
-  0x1f20, 0x1f27,
-  0x1f30, 0x1f37,
-  0x1f40, 0x1f45,
-  0x1f50, 0x1f57,
-  0x1f60, 0x1f67,
-  0x1f70, 0x1f7d,
-  0x1f80, 0x1f87,
-  0x1f90, 0x1f97,
-  0x1fa0, 0x1fa7,
-  0x1fb0, 0x1fb4,
-  0x1fb6, 0x1fb7,
-  0x1fbe, 0x1fbe,
-  0x1fc2, 0x1fc4,
-  0x1fc6, 0x1fc7,
-  0x1fd0, 0x1fd3,
-  0x1fd6, 0x1fd7,
-  0x1fe0, 0x1fe7,
-  0x1ff2, 0x1ff4,
-  0x1ff6, 0x1ff7,
-  0x2071, 0x2071,
-  0x207f, 0x207f,
-  0x210a, 0x210a,
-  0x210e, 0x210f,
-  0x2113, 0x2113,
-  0x212f, 0x212f,
-  0x2134, 0x2134,
-  0x2139, 0x2139,
-  0x213d, 0x213d,
-  0x2146, 0x2149,
-  0xfb00, 0xfb06,
-  0xfb13, 0xfb17,
-  0xff41, 0xff5a,
-  0x10428, 0x1044f,
-  0x1d41a, 0x1d433,
-  0x1d44e, 0x1d454,
-  0x1d456, 0x1d467,
-  0x1d482, 0x1d49b,
-  0x1d4b6, 0x1d4b9,
-  0x1d4bb, 0x1d4bb,
-  0x1d4bd, 0x1d4c3,
-  0x1d4c5, 0x1d4cf,
-  0x1d4ea, 0x1d503,
-  0x1d51e, 0x1d537,
-  0x1d552, 0x1d56b,
-  0x1d586, 0x1d59f,
-  0x1d5ba, 0x1d5d3,
-  0x1d5ee, 0x1d607,
-  0x1d622, 0x1d63b,
-  0x1d656, 0x1d66f,
-  0x1d68a, 0x1d6a3,
-  0x1d6c2, 0x1d6da,
-  0x1d6dc, 0x1d6e1,
-  0x1d6fc, 0x1d714,
-  0x1d716, 0x1d71b,
-  0x1d736, 0x1d74e,
-  0x1d750, 0x1d755,
-  0x1d770, 0x1d788,
-  0x1d78a, 0x1d78f,
-  0x1d7aa, 0x1d7c2,
-  0x1d7c4, 0x1d7c9
-#endif /* USE_UNICODE_FULL_RANGE_CTYPE */
-}; /* end of MBLower */
-
-static const OnigCodePoint SBPrint[] = {
-  2,
-  0x0009, 0x000d,
-  0x0020, 0x007e
-};
-
-static const OnigCodePoint MBPrint[] = {
-#ifdef USE_UNICODE_FULL_RANGE_CTYPE
-  403,
-#else
-  2,
 #endif
-  0x0085, 0x0085,
-  0x00a0, 0x0236
-#ifdef USE_UNICODE_FULL_RANGE_CTYPE
-  ,
-  0x0250, 0x0357,
-  0x035d, 0x036f,
-  0x0374, 0x0375,
-  0x037a, 0x037a,
-  0x037e, 0x037e,
-  0x0384, 0x038a,
-  0x038c, 0x038c,
-  0x038e, 0x03a1,
-  0x03a3, 0x03ce,
-  0x03d0, 0x03fb,
-  0x0400, 0x0486,
-  0x0488, 0x04ce,
-  0x04d0, 0x04f5,
-  0x04f8, 0x04f9,
-  0x0500, 0x050f,
-  0x0531, 0x0556,
-  0x0559, 0x055f,
-  0x0561, 0x0587,
-  0x0589, 0x058a,
-  0x0591, 0x05a1,
-  0x05a3, 0x05b9,
-  0x05bb, 0x05c4,
-  0x05d0, 0x05ea,
-  0x05f0, 0x05f4,
-  0x0600, 0x0603,
-  0x060c, 0x0615,
-  0x061b, 0x061b,
-  0x061f, 0x061f,
-  0x0621, 0x063a,
-  0x0640, 0x0658,
-  0x0660, 0x070d,
-  0x070f, 0x074a,
-  0x074d, 0x074f,
-  0x0780, 0x07b1,
-  0x0901, 0x0939,
-  0x093c, 0x094d,
-  0x0950, 0x0954,
-  0x0958, 0x0970,
-  0x0981, 0x0983,
-  0x0985, 0x098c,
-  0x098f, 0x0990,
-  0x0993, 0x09a8,
-  0x09aa, 0x09b0,
-  0x09b2, 0x09b2,
-  0x09b6, 0x09b9,
-  0x09bc, 0x09c4,
-  0x09c7, 0x09c8,
-  0x09cb, 0x09cd,
-  0x09d7, 0x09d7,
-  0x09dc, 0x09dd,
-  0x09df, 0x09e3,
-  0x09e6, 0x09fa,
-  0x0a01, 0x0a03,
-  0x0a05, 0x0a0a,
-  0x0a0f, 0x0a10,
-  0x0a13, 0x0a28,
-  0x0a2a, 0x0a30,
-  0x0a32, 0x0a33,
-  0x0a35, 0x0a36,
-  0x0a38, 0x0a39,
-  0x0a3c, 0x0a3c,
-  0x0a3e, 0x0a42,
-  0x0a47, 0x0a48,
-  0x0a4b, 0x0a4d,
-  0x0a59, 0x0a5c,
-  0x0a5e, 0x0a5e,
-  0x0a66, 0x0a74,
-  0x0a81, 0x0a83,
-  0x0a85, 0x0a8d,
-  0x0a8f, 0x0a91,
-  0x0a93, 0x0aa8,
-  0x0aaa, 0x0ab0,
-  0x0ab2, 0x0ab3,
-  0x0ab5, 0x0ab9,
-  0x0abc, 0x0ac5,
-  0x0ac7, 0x0ac9,
-  0x0acb, 0x0acd,
-  0x0ad0, 0x0ad0,
-  0x0ae0, 0x0ae3,
-  0x0ae6, 0x0aef,
-  0x0af1, 0x0af1,
-  0x0b01, 0x0b03,
-  0x0b05, 0x0b0c,
-  0x0b0f, 0x0b10,
-  0x0b13, 0x0b28,
-  0x0b2a, 0x0b30,
-  0x0b32, 0x0b33,
-  0x0b35, 0x0b39,
-  0x0b3c, 0x0b43,
-  0x0b47, 0x0b48,
-  0x0b4b, 0x0b4d,
-  0x0b56, 0x0b57,
-  0x0b5c, 0x0b5d,
-  0x0b5f, 0x0b61,
-  0x0b66, 0x0b71,
-  0x0b82, 0x0b83,
-  0x0b85, 0x0b8a,
-  0x0b8e, 0x0b90,
-  0x0b92, 0x0b95,
-  0x0b99, 0x0b9a,
-  0x0b9c, 0x0b9c,
-  0x0b9e, 0x0b9f,
-  0x0ba3, 0x0ba4,
-  0x0ba8, 0x0baa,
-  0x0bae, 0x0bb5,
-  0x0bb7, 0x0bb9,
-  0x0bbe, 0x0bc2,
-  0x0bc6, 0x0bc8,
-  0x0bca, 0x0bcd,
-  0x0bd7, 0x0bd7,
-  0x0be7, 0x0bfa,
-  0x0c01, 0x0c03,
-  0x0c05, 0x0c0c,
-  0x0c0e, 0x0c10,
-  0x0c12, 0x0c28,
-  0x0c2a, 0x0c33,
-  0x0c35, 0x0c39,
-  0x0c3e, 0x0c44,
-  0x0c46, 0x0c48,
-  0x0c4a, 0x0c4d,
-  0x0c55, 0x0c56,
-  0x0c60, 0x0c61,
-  0x0c66, 0x0c6f,
-  0x0c82, 0x0c83,
-  0x0c85, 0x0c8c,
-  0x0c8e, 0x0c90,
-  0x0c92, 0x0ca8,
-  0x0caa, 0x0cb3,
-  0x0cb5, 0x0cb9,
-  0x0cbc, 0x0cc4,
-  0x0cc6, 0x0cc8,
-  0x0cca, 0x0ccd,
-  0x0cd5, 0x0cd6,
-  0x0cde, 0x0cde,
-  0x0ce0, 0x0ce1,
-  0x0ce6, 0x0cef,
-  0x0d02, 0x0d03,
-  0x0d05, 0x0d0c,
-  0x0d0e, 0x0d10,
-  0x0d12, 0x0d28,
-  0x0d2a, 0x0d39,
-  0x0d3e, 0x0d43,
-  0x0d46, 0x0d48,
-  0x0d4a, 0x0d4d,
-  0x0d57, 0x0d57,
-  0x0d60, 0x0d61,
-  0x0d66, 0x0d6f,
-  0x0d82, 0x0d83,
-  0x0d85, 0x0d96,
-  0x0d9a, 0x0db1,
-  0x0db3, 0x0dbb,
-  0x0dbd, 0x0dbd,
-  0x0dc0, 0x0dc6,
-  0x0dca, 0x0dca,
-  0x0dcf, 0x0dd4,
-  0x0dd6, 0x0dd6,
-  0x0dd8, 0x0ddf,
-  0x0df2, 0x0df4,
-  0x0e01, 0x0e3a,
-  0x0e3f, 0x0e5b,
-  0x0e81, 0x0e82,
-  0x0e84, 0x0e84,
-  0x0e87, 0x0e88,
-  0x0e8a, 0x0e8a,
-  0x0e8d, 0x0e8d,
-  0x0e94, 0x0e97,
-  0x0e99, 0x0e9f,
-  0x0ea1, 0x0ea3,
-  0x0ea5, 0x0ea5,
-  0x0ea7, 0x0ea7,
-  0x0eaa, 0x0eab,
-  0x0ead, 0x0eb9,
-  0x0ebb, 0x0ebd,
-  0x0ec0, 0x0ec4,
-  0x0ec6, 0x0ec6,
-  0x0ec8, 0x0ecd,
-  0x0ed0, 0x0ed9,
-  0x0edc, 0x0edd,
-  0x0f00, 0x0f47,
-  0x0f49, 0x0f6a,
-  0x0f71, 0x0f8b,
-  0x0f90, 0x0f97,
-  0x0f99, 0x0fbc,
-  0x0fbe, 0x0fcc,
-  0x0fcf, 0x0fcf,
-  0x1000, 0x1021,
-  0x1023, 0x1027,
-  0x1029, 0x102a,
-  0x102c, 0x1032,
-  0x1036, 0x1039,
-  0x1040, 0x1059,
-  0x10a0, 0x10c5,
-  0x10d0, 0x10f8,
-  0x10fb, 0x10fb,
-  0x1100, 0x1159,
-  0x115f, 0x11a2,
-  0x11a8, 0x11f9,
-  0x1200, 0x1206,
-  0x1208, 0x1246,
-  0x1248, 0x1248,
-  0x124a, 0x124d,
-  0x1250, 0x1256,
-  0x1258, 0x1258,
-  0x125a, 0x125d,
-  0x1260, 0x1286,
-  0x1288, 0x1288,
-  0x128a, 0x128d,
-  0x1290, 0x12ae,
-  0x12b0, 0x12b0,
-  0x12b2, 0x12b5,
-  0x12b8, 0x12be,
-  0x12c0, 0x12c0,
-  0x12c2, 0x12c5,
-  0x12c8, 0x12ce,
-  0x12d0, 0x12d6,
-  0x12d8, 0x12ee,
-  0x12f0, 0x130e,
-  0x1310, 0x1310,
-  0x1312, 0x1315,
-  0x1318, 0x131e,
-  0x1320, 0x1346,
-  0x1348, 0x135a,
-  0x1361, 0x137c,
-  0x13a0, 0x13f4,
-  0x1401, 0x1676,
-  0x1680, 0x169c,
-  0x16a0, 0x16f0,
-  0x1700, 0x170c,
-  0x170e, 0x1714,
-  0x1720, 0x1736,
-  0x1740, 0x1753,
-  0x1760, 0x176c,
-  0x176e, 0x1770,
-  0x1772, 0x1773,
-  0x1780, 0x17dd,
-  0x17e0, 0x17e9,
-  0x17f0, 0x17f9,
-  0x1800, 0x180e,
-  0x1810, 0x1819,
-  0x1820, 0x1877,
-  0x1880, 0x18a9,
-  0x1900, 0x191c,
-  0x1920, 0x192b,
-  0x1930, 0x193b,
-  0x1940, 0x1940,
-  0x1944, 0x196d,
-  0x1970, 0x1974,
-  0x19e0, 0x19ff,
-  0x1d00, 0x1d6b,
-  0x1e00, 0x1e9b,
-  0x1ea0, 0x1ef9,
-  0x1f00, 0x1f15,
-  0x1f18, 0x1f1d,
-  0x1f20, 0x1f45,
-  0x1f48, 0x1f4d,
-  0x1f50, 0x1f57,
-  0x1f59, 0x1f59,
-  0x1f5b, 0x1f5b,
-  0x1f5d, 0x1f5d,
-  0x1f5f, 0x1f7d,
-  0x1f80, 0x1fb4,
-  0x1fb6, 0x1fc4,
-  0x1fc6, 0x1fd3,
-  0x1fd6, 0x1fdb,
-  0x1fdd, 0x1fef,
-  0x1ff2, 0x1ff4,
-  0x1ff6, 0x1ffe,
-  0x2000, 0x2054,
-  0x2057, 0x2057,
-  0x205f, 0x2063,
-  0x206a, 0x2071,
-  0x2074, 0x208e,
-  0x20a0, 0x20b1,
-  0x20d0, 0x20ea,
-  0x2100, 0x213b,
-  0x213d, 0x214b,
-  0x2153, 0x2183,
-  0x2190, 0x23d0,
-  0x2400, 0x2426,
-  0x2440, 0x244a,
-  0x2460, 0x2617,
-  0x2619, 0x267d,
-  0x2680, 0x2691,
-  0x26a0, 0x26a1,
-  0x2701, 0x2704,
-  0x2706, 0x2709,
-  0x270c, 0x2727,
-  0x2729, 0x274b,
-  0x274d, 0x274d,
-  0x274f, 0x2752,
-  0x2756, 0x2756,
-  0x2758, 0x275e,
-  0x2761, 0x2794,
-  0x2798, 0x27af,
-  0x27b1, 0x27be,
-  0x27d0, 0x27eb,
-  0x27f0, 0x2b0d,
-  0x2e80, 0x2e99,
-  0x2e9b, 0x2ef3,
-  0x2f00, 0x2fd5,
-  0x2ff0, 0x2ffb,
-  0x3000, 0x303f,
-  0x3041, 0x3096,
-  0x3099, 0x30ff,
-  0x3105, 0x312c,
-  0x3131, 0x318e,
-  0x3190, 0x31b7,
-  0x31f0, 0x321e,
-  0x3220, 0x3243,
-  0x3250, 0x327d,
-  0x327f, 0x32fe,
-  0x3300, 0x4db5,
-  0x4dc0, 0x9fa5,
-  0xa000, 0xa48c,
-  0xa490, 0xa4c6,
-  0xac00, 0xd7a3,
-  0xe000, 0xfa2d,
-  0xfa30, 0xfa6a,
-  0xfb00, 0xfb06,
-  0xfb13, 0xfb17,
-  0xfb1d, 0xfb36,
-  0xfb38, 0xfb3c,
-  0xfb3e, 0xfb3e,
-  0xfb40, 0xfb41,
-  0xfb43, 0xfb44,
-  0xfb46, 0xfbb1,
-  0xfbd3, 0xfd3f,
-  0xfd50, 0xfd8f,
-  0xfd92, 0xfdc7,
-  0xfdf0, 0xfdfd,
-  0xfe00, 0xfe0f,
-  0xfe20, 0xfe23,
-  0xfe30, 0xfe52,
-  0xfe54, 0xfe66,
-  0xfe68, 0xfe6b,
-  0xfe70, 0xfe74,
-  0xfe76, 0xfefc,
-  0xfeff, 0xfeff,
-  0xff01, 0xffbe,
-  0xffc2, 0xffc7,
-  0xffca, 0xffcf,
-  0xffd2, 0xffd7,
-  0xffda, 0xffdc,
-  0xffe0, 0xffe6,
-  0xffe8, 0xffee,
-  0xfff9, 0xfffd,
-  0x10000, 0x1000b,
-  0x1000d, 0x10026,
-  0x10028, 0x1003a,
-  0x1003c, 0x1003d,
-  0x1003f, 0x1004d,
-  0x10050, 0x1005d,
-  0x10080, 0x100fa,
-  0x10100, 0x10102,
-  0x10107, 0x10133,
-  0x10137, 0x1013f,
-  0x10300, 0x1031e,
-  0x10320, 0x10323,
-  0x10330, 0x1034a,
-  0x10380, 0x1039d,
-  0x1039f, 0x1039f,
-  0x10400, 0x1049d,
-  0x104a0, 0x104a9,
-  0x10800, 0x10805,
-  0x10808, 0x10808,
-  0x1080a, 0x10835,
-  0x10837, 0x10838,
-  0x1083c, 0x1083c,
-  0x1083f, 0x1083f,
-  0x1d000, 0x1d0f5,
-  0x1d100, 0x1d126,
-  0x1d12a, 0x1d1dd,
-  0x1d300, 0x1d356,
-  0x1d400, 0x1d454,
-  0x1d456, 0x1d49c,
-  0x1d49e, 0x1d49f,
-  0x1d4a2, 0x1d4a2,
-  0x1d4a5, 0x1d4a6,
-  0x1d4a9, 0x1d4ac,
-  0x1d4ae, 0x1d4b9,
-  0x1d4bb, 0x1d4bb,
-  0x1d4bd, 0x1d4c3,
-  0x1d4c5, 0x1d505,
-  0x1d507, 0x1d50a,
-  0x1d50d, 0x1d514,
-  0x1d516, 0x1d51c,
-  0x1d51e, 0x1d539,
-  0x1d53b, 0x1d53e,
-  0x1d540, 0x1d544,
-  0x1d546, 0x1d546,
-  0x1d54a, 0x1d550,
-  0x1d552, 0x1d6a3,
-  0x1d6a8, 0x1d7c9,
-  0x1d7ce, 0x1d7ff,
-  0x20000, 0x2a6d6,
-  0x2f800, 0x2fa1d,
-  0xe0001, 0xe0001,
-  0xe0020, 0xe007f,
-  0xe0100, 0xe01ef,
-  0xf0000, 0xffffd,
-  0x100000, 0x10fffd
-#endif /* USE_UNICODE_FULL_RANGE_CTYPE */
-}; /* end of MBPrint */
-
-static const OnigCodePoint SBPunct[] = {
-  9,
-  0x0021, 0x0023,
-  0x0025, 0x002a,
-  0x002c, 0x002f,
-  0x003a, 0x003b,
-  0x003f, 0x0040,
-  0x005b, 0x005d,
-  0x005f, 0x005f,
-  0x007b, 0x007b,
-  0x007d, 0x007d
-}; /* end of SBPunct */
-
-static const OnigCodePoint MBPunct[] = {
-#ifdef USE_UNICODE_FULL_RANGE_CTYPE
-  77,
-#else
-  5,
-#endif
-  0x00a1, 0x00a1,
-  0x00ab, 0x00ab,
-  0x00b7, 0x00b7,
-  0x00bb, 0x00bb,
-  0x00bf, 0x00bf
-#ifdef USE_UNICODE_FULL_RANGE_CTYPE
-  ,
-  0x037e, 0x037e,
-  0x0387, 0x0387,
-  0x055a, 0x055f,
-  0x0589, 0x058a,
-  0x05be, 0x05be,
-  0x05c0, 0x05c0,
-  0x05c3, 0x05c3,
-  0x05f3, 0x05f4,
-  0x060c, 0x060d,
-  0x061b, 0x061b,
-  0x061f, 0x061f,
-  0x066a, 0x066d,
-  0x06d4, 0x06d4,
-  0x0700, 0x070d,
-  0x0964, 0x0965,
-  0x0970, 0x0970,
-  0x0df4, 0x0df4,
-  0x0e4f, 0x0e4f,
-  0x0e5a, 0x0e5b,
-  0x0f04, 0x0f12,
-  0x0f3a, 0x0f3d,
-  0x0f85, 0x0f85,
-  0x104a, 0x104f,
-  0x10fb, 0x10fb,
-  0x1361, 0x1368,
-  0x166d, 0x166e,
-  0x169b, 0x169c,
-  0x16eb, 0x16ed,
-  0x1735, 0x1736,
-  0x17d4, 0x17d6,
-  0x17d8, 0x17da,
-  0x1800, 0x180a,
-  0x1944, 0x1945,
-  0x2010, 0x2027,
-  0x2030, 0x2043,
-  0x2045, 0x2051,
-  0x2053, 0x2054,
-  0x2057, 0x2057,
-  0x207d, 0x207e,
-  0x208d, 0x208e,
-  0x2329, 0x232a,
-  0x23b4, 0x23b6,
-  0x2768, 0x2775,
-  0x27e6, 0x27eb,
-  0x2983, 0x2998,
-  0x29d8, 0x29db,
-  0x29fc, 0x29fd,
-  0x3001, 0x3003,
-  0x3008, 0x3011,
-  0x3014, 0x301f,
-  0x3030, 0x3030,
-  0x303d, 0x303d,
-  0x30a0, 0x30a0,
-  0x30fb, 0x30fb,
-  0xfd3e, 0xfd3f,
-  0xfe30, 0xfe52,
-  0xfe54, 0xfe61,
-  0xfe63, 0xfe63,
-  0xfe68, 0xfe68,
-  0xfe6a, 0xfe6b,
-  0xff01, 0xff03,
-  0xff05, 0xff0a,
-  0xff0c, 0xff0f,
-  0xff1a, 0xff1b,
-  0xff1f, 0xff20,
-  0xff3b, 0xff3d,
-  0xff3f, 0xff3f,
-  0xff5b, 0xff5b,
-  0xff5d, 0xff5d,
-  0xff5f, 0xff65,
-  0x10100, 0x10101,
-  0x1039f, 0x1039f
-#endif /* USE_UNICODE_FULL_RANGE_CTYPE */
-}; /* end of MBPunct */
-
-static const OnigCodePoint SBSpace[] = {
-  2,
-  0x0009, 0x000d,
-  0x0020, 0x0020
-};
-
-static const OnigCodePoint MBSpace[] = {
-#ifdef USE_UNICODE_FULL_RANGE_CTYPE
-  9,
-#else
-  2,
-#endif
-  0x0085, 0x0085,
-  0x00a0, 0x00a0
-#ifdef USE_UNICODE_FULL_RANGE_CTYPE
-  ,
-  0x1680, 0x1680,
-  0x180e, 0x180e,
-  0x2000, 0x200a,
-  0x2028, 0x2029,
-  0x202f, 0x202f,
-  0x205f, 0x205f,
-  0x3000, 0x3000
-#endif /* USE_UNICODE_FULL_RANGE_CTYPE */
-}; /* end of MBSpace */
-
-static const OnigCodePoint SBUpper[] = {
-  1,
-  0x0041, 0x005a
-};
-
-static const OnigCodePoint MBUpper[] = {
-#ifdef USE_UNICODE_FULL_RANGE_CTYPE
-  420,
-#else
-  2,
-#endif
-  0x00c0, 0x00d6,
-  0x00d8, 0x00de
-#ifdef USE_UNICODE_FULL_RANGE_CTYPE
-  ,
-  0x0100, 0x0100,
-  0x0102, 0x0102,
-  0x0104, 0x0104,
-  0x0106, 0x0106,
-  0x0108, 0x0108,
-  0x010a, 0x010a,
-  0x010c, 0x010c,
-  0x010e, 0x010e,
-  0x0110, 0x0110,
-  0x0112, 0x0112,
-  0x0114, 0x0114,
-  0x0116, 0x0116,
-  0x0118, 0x0118,
-  0x011a, 0x011a,
-  0x011c, 0x011c,
-  0x011e, 0x011e,
-  0x0120, 0x0120,
-  0x0122, 0x0122,
-  0x0124, 0x0124,
-  0x0126, 0x0126,
-  0x0128, 0x0128,
-  0x012a, 0x012a,
-  0x012c, 0x012c,
-  0x012e, 0x012e,
-  0x0130, 0x0130,
-  0x0132, 0x0132,
-  0x0134, 0x0134,
-  0x0136, 0x0136,
-  0x0139, 0x0139,
-  0x013b, 0x013b,
-  0x013d, 0x013d,
-  0x013f, 0x013f,
-  0x0141, 0x0141,
-  0x0143, 0x0143,
-  0x0145, 0x0145,
-  0x0147, 0x0147,
-  0x014a, 0x014a,
-  0x014c, 0x014c,
-  0x014e, 0x014e,
-  0x0150, 0x0150,
-  0x0152, 0x0152,
-  0x0154, 0x0154,
-  0x0156, 0x0156,
-  0x0158, 0x0158,
-  0x015a, 0x015a,
-  0x015c, 0x015c,
-  0x015e, 0x015e,
-  0x0160, 0x0160,
-  0x0162, 0x0162,
-  0x0164, 0x0164,
-  0x0166, 0x0166,
-  0x0168, 0x0168,
-  0x016a, 0x016a,
-  0x016c, 0x016c,
-  0x016e, 0x016e,
-  0x0170, 0x0170,
-  0x0172, 0x0172,
-  0x0174, 0x0174,
-  0x0176, 0x0176,
-  0x0178, 0x0179,
-  0x017b, 0x017b,
-  0x017d, 0x017d,
-  0x0181, 0x0182,
-  0x0184, 0x0184,
-  0x0186, 0x0187,
-  0x0189, 0x018b,
-  0x018e, 0x0191,
-  0x0193, 0x0194,
-  0x0196, 0x0198,
-  0x019c, 0x019d,
-  0x019f, 0x01a0,
-  0x01a2, 0x01a2,
-  0x01a4, 0x01a4,
-  0x01a6, 0x01a7,
-  0x01a9, 0x01a9,
-  0x01ac, 0x01ac,
-  0x01ae, 0x01af,
-  0x01b1, 0x01b3,
-  0x01b5, 0x01b5,
-  0x01b7, 0x01b8,
-  0x01bc, 0x01bc,
-  0x01c4, 0x01c4,
-  0x01c7, 0x01c7,
-  0x01ca, 0x01ca,
-  0x01cd, 0x01cd,
-  0x01cf, 0x01cf,
-  0x01d1, 0x01d1,
-  0x01d3, 0x01d3,
-  0x01d5, 0x01d5,
-  0x01d7, 0x01d7,
-  0x01d9, 0x01d9,
-  0x01db, 0x01db,
-  0x01de, 0x01de,
-  0x01e0, 0x01e0,
-  0x01e2, 0x01e2,
-  0x01e4, 0x01e4,
-  0x01e6, 0x01e6,
-  0x01e8, 0x01e8,
-  0x01ea, 0x01ea,
-  0x01ec, 0x01ec,
-  0x01ee, 0x01ee,
-  0x01f1, 0x01f1,
-  0x01f4, 0x01f4,
-  0x01f6, 0x01f8,
-  0x01fa, 0x01fa,
-  0x01fc, 0x01fc,
-  0x01fe, 0x01fe,
-  0x0200, 0x0200,
-  0x0202, 0x0202,
-  0x0204, 0x0204,
-  0x0206, 0x0206,
-  0x0208, 0x0208,
-  0x020a, 0x020a,
-  0x020c, 0x020c,
-  0x020e, 0x020e,
-  0x0210, 0x0210,
-  0x0212, 0x0212,
-  0x0214, 0x0214,
-  0x0216, 0x0216,
-  0x0218, 0x0218,
-  0x021a, 0x021a,
-  0x021c, 0x021c,
-  0x021e, 0x021e,
-  0x0220, 0x0220,
-  0x0222, 0x0222,
-  0x0224, 0x0224,
-  0x0226, 0x0226,
-  0x0228, 0x0228,
-  0x022a, 0x022a,
-  0x022c, 0x022c,
-  0x022e, 0x022e,
-  0x0230, 0x0230,
-  0x0232, 0x0232,
-  0x0386, 0x0386,
-  0x0388, 0x038a,
-  0x038c, 0x038c,
-  0x038e, 0x038f,
-  0x0391, 0x03a1,
-  0x03a3, 0x03ab,
-  0x03d2, 0x03d4,
-  0x03d8, 0x03d8,
-  0x03da, 0x03da,
-  0x03dc, 0x03dc,
-  0x03de, 0x03de,
-  0x03e0, 0x03e0,
-  0x03e2, 0x03e2,
-  0x03e4, 0x03e4,
-  0x03e6, 0x03e6,
-  0x03e8, 0x03e8,
-  0x03ea, 0x03ea,
-  0x03ec, 0x03ec,
-  0x03ee, 0x03ee,
-  0x03f4, 0x03f4,
-  0x03f7, 0x03f7,
-  0x03f9, 0x03fa,
-  0x0400, 0x042f,
-  0x0460, 0x0460,
-  0x0462, 0x0462,
-  0x0464, 0x0464,
-  0x0466, 0x0466,
-  0x0468, 0x0468,
-  0x046a, 0x046a,
-  0x046c, 0x046c,
-  0x046e, 0x046e,
-  0x0470, 0x0470,
-  0x0472, 0x0472,
-  0x0474, 0x0474,
-  0x0476, 0x0476,
-  0x0478, 0x0478,
-  0x047a, 0x047a,
-  0x047c, 0x047c,
-  0x047e, 0x047e,
-  0x0480, 0x0480,
-  0x048a, 0x048a,
-  0x048c, 0x048c,
-  0x048e, 0x048e,
-  0x0490, 0x0490,
-  0x0492, 0x0492,
-  0x0494, 0x0494,
-  0x0496, 0x0496,
-  0x0498, 0x0498,
-  0x049a, 0x049a,
-  0x049c, 0x049c,
-  0x049e, 0x049e,
-  0x04a0, 0x04a0,
-  0x04a2, 0x04a2,
-  0x04a4, 0x04a4,
-  0x04a6, 0x04a6,
-  0x04a8, 0x04a8,
-  0x04aa, 0x04aa,
-  0x04ac, 0x04ac,
-  0x04ae, 0x04ae,
-  0x04b0, 0x04b0,
-  0x04b2, 0x04b2,
-  0x04b4, 0x04b4,
-  0x04b6, 0x04b6,
-  0x04b8, 0x04b8,
-  0x04ba, 0x04ba,
-  0x04bc, 0x04bc,
-  0x04be, 0x04be,
-  0x04c0, 0x04c1,
-  0x04c3, 0x04c3,
-  0x04c5, 0x04c5,
-  0x04c7, 0x04c7,
-  0x04c9, 0x04c9,
-  0x04cb, 0x04cb,
-  0x04cd, 0x04cd,
-  0x04d0, 0x04d0,
-  0x04d2, 0x04d2,
-  0x04d4, 0x04d4,
-  0x04d6, 0x04d6,
-  0x04d8, 0x04d8,
-  0x04da, 0x04da,
-  0x04dc, 0x04dc,
-  0x04de, 0x04de,
-  0x04e0, 0x04e0,
-  0x04e2, 0x04e2,
-  0x04e4, 0x04e4,
-  0x04e6, 0x04e6,
-  0x04e8, 0x04e8,
-  0x04ea, 0x04ea,
-  0x04ec, 0x04ec,
-  0x04ee, 0x04ee,
-  0x04f0, 0x04f0,
-  0x04f2, 0x04f2,
-  0x04f4, 0x04f4,
-  0x04f8, 0x04f8,
-  0x0500, 0x0500,
-  0x0502, 0x0502,
-  0x0504, 0x0504,
-  0x0506, 0x0506,
-  0x0508, 0x0508,
-  0x050a, 0x050a,
-  0x050c, 0x050c,
-  0x050e, 0x050e,
-  0x0531, 0x0556,
-  0x10a0, 0x10c5,
-  0x1e00, 0x1e00,
-  0x1e02, 0x1e02,
-  0x1e04, 0x1e04,
-  0x1e06, 0x1e06,
-  0x1e08, 0x1e08,
-  0x1e0a, 0x1e0a,
-  0x1e0c, 0x1e0c,
-  0x1e0e, 0x1e0e,
-  0x1e10, 0x1e10,
-  0x1e12, 0x1e12,
-  0x1e14, 0x1e14,
-  0x1e16, 0x1e16,
-  0x1e18, 0x1e18,
-  0x1e1a, 0x1e1a,
-  0x1e1c, 0x1e1c,
-  0x1e1e, 0x1e1e,
-  0x1e20, 0x1e20,
-  0x1e22, 0x1e22,
-  0x1e24, 0x1e24,
-  0x1e26, 0x1e26,
-  0x1e28, 0x1e28,
-  0x1e2a, 0x1e2a,
-  0x1e2c, 0x1e2c,
-  0x1e2e, 0x1e2e,
-  0x1e30, 0x1e30,
-  0x1e32, 0x1e32,
-  0x1e34, 0x1e34,
-  0x1e36, 0x1e36,
-  0x1e38, 0x1e38,
-  0x1e3a, 0x1e3a,
-  0x1e3c, 0x1e3c,
-  0x1e3e, 0x1e3e,
-  0x1e40, 0x1e40,
-  0x1e42, 0x1e42,
-  0x1e44, 0x1e44,
-  0x1e46, 0x1e46,
-  0x1e48, 0x1e48,
-  0x1e4a, 0x1e4a,
-  0x1e4c, 0x1e4c,
-  0x1e4e, 0x1e4e,
-  0x1e50, 0x1e50,
-  0x1e52, 0x1e52,
-  0x1e54, 0x1e54,
-  0x1e56, 0x1e56,
-  0x1e58, 0x1e58,
-  0x1e5a, 0x1e5a,
-  0x1e5c, 0x1e5c,
-  0x1e5e, 0x1e5e,
-  0x1e60, 0x1e60,
-  0x1e62, 0x1e62,
-  0x1e64, 0x1e64,
-  0x1e66, 0x1e66,
-  0x1e68, 0x1e68,
-  0x1e6a, 0x1e6a,
-  0x1e6c, 0x1e6c,
-  0x1e6e, 0x1e6e,
-  0x1e70, 0x1e70,
-  0x1e72, 0x1e72,
-  0x1e74, 0x1e74,
-  0x1e76, 0x1e76,
-  0x1e78, 0x1e78,
-  0x1e7a, 0x1e7a,
-  0x1e7c, 0x1e7c,
-  0x1e7e, 0x1e7e,
-  0x1e80, 0x1e80,
-  0x1e82, 0x1e82,
-  0x1e84, 0x1e84,
-  0x1e86, 0x1e86,
-  0x1e88, 0x1e88,
-  0x1e8a, 0x1e8a,
-  0x1e8c, 0x1e8c,
-  0x1e8e, 0x1e8e,
-  0x1e90, 0x1e90,
-  0x1e92, 0x1e92,
-  0x1e94, 0x1e94,
-  0x1ea0, 0x1ea0,
-  0x1ea2, 0x1ea2,
-  0x1ea4, 0x1ea4,
-  0x1ea6, 0x1ea6,
-  0x1ea8, 0x1ea8,
-  0x1eaa, 0x1eaa,
-  0x1eac, 0x1eac,
-  0x1eae, 0x1eae,
-  0x1eb0, 0x1eb0,
-  0x1eb2, 0x1eb2,
-  0x1eb4, 0x1eb4,
-  0x1eb6, 0x1eb6,
-  0x1eb8, 0x1eb8,
-  0x1eba, 0x1eba,
-  0x1ebc, 0x1ebc,
-  0x1ebe, 0x1ebe,
-  0x1ec0, 0x1ec0,
-  0x1ec2, 0x1ec2,
-  0x1ec4, 0x1ec4,
-  0x1ec6, 0x1ec6,
-  0x1ec8, 0x1ec8,
-  0x1eca, 0x1eca,
-  0x1ecc, 0x1ecc,
-  0x1ece, 0x1ece,
-  0x1ed0, 0x1ed0,
-  0x1ed2, 0x1ed2,
-  0x1ed4, 0x1ed4,
-  0x1ed6, 0x1ed6,
-  0x1ed8, 0x1ed8,
-  0x1eda, 0x1eda,
-  0x1edc, 0x1edc,
-  0x1ede, 0x1ede,
-  0x1ee0, 0x1ee0,
-  0x1ee2, 0x1ee2,
-  0x1ee4, 0x1ee4,
-  0x1ee6, 0x1ee6,
-  0x1ee8, 0x1ee8,
-  0x1eea, 0x1eea,
-  0x1eec, 0x1eec,
-  0x1eee, 0x1eee,
-  0x1ef0, 0x1ef0,
-  0x1ef2, 0x1ef2,
-  0x1ef4, 0x1ef4,
-  0x1ef6, 0x1ef6,
-  0x1ef8, 0x1ef8,
-  0x1f08, 0x1f0f,
-  0x1f18, 0x1f1d,
-  0x1f28, 0x1f2f,
-  0x1f38, 0x1f3f,
-  0x1f48, 0x1f4d,
-  0x1f59, 0x1f59,
-  0x1f5b, 0x1f5b,
-  0x1f5d, 0x1f5d,
-  0x1f5f, 0x1f5f,
-  0x1f68, 0x1f6f,
-  0x1fb8, 0x1fbb,
-  0x1fc8, 0x1fcb,
-  0x1fd8, 0x1fdb,
-  0x1fe8, 0x1fec,
-  0x1ff8, 0x1ffb,
-  0x2102, 0x2102,
-  0x2107, 0x2107,
-  0x210b, 0x210d,
-  0x2110, 0x2112,
-  0x2115, 0x2115,
-  0x2119, 0x211d,
-  0x2124, 0x2124,
-  0x2126, 0x2126,
-  0x2128, 0x2128,
-  0x212a, 0x212d,
-  0x2130, 0x2131,
-  0x2133, 0x2133,
-  0x213e, 0x213f,
-  0x2145, 0x2145,
-  0xff21, 0xff3a,
-  0x10400, 0x10427,
-  0x1d400, 0x1d419,
-  0x1d434, 0x1d44d,
-  0x1d468, 0x1d481,
-  0x1d49c, 0x1d49c,
-  0x1d49e, 0x1d49f,
-  0x1d4a2, 0x1d4a2,
-  0x1d4a5, 0x1d4a6,
-  0x1d4a9, 0x1d4ac,
-  0x1d4ae, 0x1d4b5,
-  0x1d4d0, 0x1d4e9,
-  0x1d504, 0x1d505,
-  0x1d507, 0x1d50a,
-  0x1d50d, 0x1d514,
-  0x1d516, 0x1d51c,
-  0x1d538, 0x1d539,
-  0x1d53b, 0x1d53e,
-  0x1d540, 0x1d544,
-  0x1d546, 0x1d546,
-  0x1d54a, 0x1d550,
-  0x1d56c, 0x1d585,
-  0x1d5a0, 0x1d5b9,
-  0x1d5d4, 0x1d5ed,
-  0x1d608, 0x1d621,
-  0x1d63c, 0x1d655,
-  0x1d670, 0x1d689,
-  0x1d6a8, 0x1d6c0,
-  0x1d6e2, 0x1d6fa,
-  0x1d71c, 0x1d734,
-  0x1d756, 0x1d76e,
-  0x1d790, 0x1d7a8
-#endif /* USE_UNICODE_FULL_RANGE_CTYPE */
-}; /* end of MBUpper */
-
-static const OnigCodePoint SBXDigit[] = {
-  3,
-  0x0030, 0x0039,
-  0x0041, 0x0046,
-  0x0061, 0x0066
-};
-
-static const OnigCodePoint SBASCII[] = {
-  1,
-  0x0000, 0x007f
-};
-
-static const OnigCodePoint SBWord[] = {
-  4,
-  0x0030, 0x0039,
-  0x0041, 0x005a,
-  0x005f, 0x005f,
-  0x0061, 0x007a
-};
-
-static const OnigCodePoint MBWord[] = {
-#ifdef USE_UNICODE_FULL_RANGE_CTYPE
-  432,
-#else
-  8,
-#endif
-  0x00aa, 0x00aa,
-  0x00b2, 0x00b3,
-  0x00b5, 0x00b5,
-  0x00b9, 0x00ba,
-  0x00bc, 0x00be,
-  0x00c0, 0x00d6,
-  0x00d8, 0x00f6,
-#ifndef USE_UNICODE_FULL_RANGE_CTYPE
-  0x00f8, 0x7fffffff
-#else /* not USE_UNICODE_FULL_RANGE_CTYPE */
-  0x00f8, 0x0236,
-  0x0250, 0x02c1,
-  0x02c6, 0x02d1,
-  0x02e0, 0x02e4,
-  0x02ee, 0x02ee,
-  0x0300, 0x0357,
-  0x035d, 0x036f,
-  0x037a, 0x037a,
-  0x0386, 0x0386,
-  0x0388, 0x038a,
-  0x038c, 0x038c,
-  0x038e, 0x03a1,
-  0x03a3, 0x03ce,
-  0x03d0, 0x03f5,
-  0x03f7, 0x03fb,
-  0x0400, 0x0481,
-  0x0483, 0x0486,
-  0x0488, 0x04ce,
-  0x04d0, 0x04f5,
-  0x04f8, 0x04f9,
-  0x0500, 0x050f,
-  0x0531, 0x0556,
-  0x0559, 0x0559,
-  0x0561, 0x0587,
-  0x0591, 0x05a1,
-  0x05a3, 0x05b9,
-  0x05bb, 0x05bd,
-  0x05bf, 0x05bf,
-  0x05c1, 0x05c2,
-  0x05c4, 0x05c4,
-  0x05d0, 0x05ea,
-  0x05f0, 0x05f2,
-  0x0610, 0x0615,
-  0x0621, 0x063a,
-  0x0640, 0x0658,
-  0x0660, 0x0669,
-  0x066e, 0x06d3,
-  0x06d5, 0x06dc,
-  0x06de, 0x06e8,
-  0x06ea, 0x06fc,
-  0x06ff, 0x06ff,
-  0x0710, 0x074a,
-  0x074d, 0x074f,
-  0x0780, 0x07b1,
-  0x0901, 0x0939,
-  0x093c, 0x094d,
-  0x0950, 0x0954,
-  0x0958, 0x0963,
-  0x0966, 0x096f,
-  0x0981, 0x0983,
-  0x0985, 0x098c,
-  0x098f, 0x0990,
-  0x0993, 0x09a8,
-  0x09aa, 0x09b0,
-  0x09b2, 0x09b2,
-  0x09b6, 0x09b9,
-  0x09bc, 0x09c4,
-  0x09c7, 0x09c8,
-  0x09cb, 0x09cd,
-  0x09d7, 0x09d7,
-  0x09dc, 0x09dd,
-  0x09df, 0x09e3,
-  0x09e6, 0x09f1,
-  0x09f4, 0x09f9,
-  0x0a01, 0x0a03,
-  0x0a05, 0x0a0a,
-  0x0a0f, 0x0a10,
-  0x0a13, 0x0a28,
-  0x0a2a, 0x0a30,
-  0x0a32, 0x0a33,
-  0x0a35, 0x0a36,
-  0x0a38, 0x0a39,
-  0x0a3c, 0x0a3c,
-  0x0a3e, 0x0a42,
-  0x0a47, 0x0a48,
-  0x0a4b, 0x0a4d,
-  0x0a59, 0x0a5c,
-  0x0a5e, 0x0a5e,
-  0x0a66, 0x0a74,
-  0x0a81, 0x0a83,
-  0x0a85, 0x0a8d,
-  0x0a8f, 0x0a91,
-  0x0a93, 0x0aa8,
-  0x0aaa, 0x0ab0,
-  0x0ab2, 0x0ab3,
-  0x0ab5, 0x0ab9,
-  0x0abc, 0x0ac5,
-  0x0ac7, 0x0ac9,
-  0x0acb, 0x0acd,
-  0x0ad0, 0x0ad0,
-  0x0ae0, 0x0ae3,
-  0x0ae6, 0x0aef,
-  0x0b01, 0x0b03,
-  0x0b05, 0x0b0c,
-  0x0b0f, 0x0b10,
-  0x0b13, 0x0b28,
-  0x0b2a, 0x0b30,
-  0x0b32, 0x0b33,
-  0x0b35, 0x0b39,
-  0x0b3c, 0x0b43,
-  0x0b47, 0x0b48,
-  0x0b4b, 0x0b4d,
-  0x0b56, 0x0b57,
-  0x0b5c, 0x0b5d,
-  0x0b5f, 0x0b61,
-  0x0b66, 0x0b6f,
-  0x0b71, 0x0b71,
-  0x0b82, 0x0b83,
-  0x0b85, 0x0b8a,
-  0x0b8e, 0x0b90,
-  0x0b92, 0x0b95,
-  0x0b99, 0x0b9a,
-  0x0b9c, 0x0b9c,
-  0x0b9e, 0x0b9f,
-  0x0ba3, 0x0ba4,
-  0x0ba8, 0x0baa,
-  0x0bae, 0x0bb5,
-  0x0bb7, 0x0bb9,
-  0x0bbe, 0x0bc2,
-  0x0bc6, 0x0bc8,
-  0x0bca, 0x0bcd,
-  0x0bd7, 0x0bd7,
-  0x0be7, 0x0bf2,
-  0x0c01, 0x0c03,
-  0x0c05, 0x0c0c,
-  0x0c0e, 0x0c10,
-  0x0c12, 0x0c28,
-  0x0c2a, 0x0c33,
-  0x0c35, 0x0c39,
-  0x0c3e, 0x0c44,
-  0x0c46, 0x0c48,
-  0x0c4a, 0x0c4d,
-  0x0c55, 0x0c56,
-  0x0c60, 0x0c61,
-  0x0c66, 0x0c6f,
-  0x0c82, 0x0c83,
-  0x0c85, 0x0c8c,
-  0x0c8e, 0x0c90,
-  0x0c92, 0x0ca8,
-  0x0caa, 0x0cb3,
-  0x0cb5, 0x0cb9,
-  0x0cbc, 0x0cc4,
-  0x0cc6, 0x0cc8,
-  0x0cca, 0x0ccd,
-  0x0cd5, 0x0cd6,
-  0x0cde, 0x0cde,
-  0x0ce0, 0x0ce1,
-  0x0ce6, 0x0cef,
-  0x0d02, 0x0d03,
-  0x0d05, 0x0d0c,
-  0x0d0e, 0x0d10,
-  0x0d12, 0x0d28,
-  0x0d2a, 0x0d39,
-  0x0d3e, 0x0d43,
-  0x0d46, 0x0d48,
-  0x0d4a, 0x0d4d,
-  0x0d57, 0x0d57,
-  0x0d60, 0x0d61,
-  0x0d66, 0x0d6f,
-  0x0d82, 0x0d83,
-  0x0d85, 0x0d96,
-  0x0d9a, 0x0db1,
-  0x0db3, 0x0dbb,
-  0x0dbd, 0x0dbd,
-  0x0dc0, 0x0dc6,
-  0x0dca, 0x0dca,
-  0x0dcf, 0x0dd4,
-  0x0dd6, 0x0dd6,
-  0x0dd8, 0x0ddf,
-  0x0df2, 0x0df3,
-  0x0e01, 0x0e3a,
-  0x0e40, 0x0e4e,
-  0x0e50, 0x0e59,
-  0x0e81, 0x0e82,
-  0x0e84, 0x0e84,
-  0x0e87, 0x0e88,
-  0x0e8a, 0x0e8a,
-  0x0e8d, 0x0e8d,
-  0x0e94, 0x0e97,
-  0x0e99, 0x0e9f,
-  0x0ea1, 0x0ea3,
-  0x0ea5, 0x0ea5,
-  0x0ea7, 0x0ea7,
-  0x0eaa, 0x0eab,
-  0x0ead, 0x0eb9,
-  0x0ebb, 0x0ebd,
-  0x0ec0, 0x0ec4,
-  0x0ec6, 0x0ec6,
-  0x0ec8, 0x0ecd,
-  0x0ed0, 0x0ed9,
-  0x0edc, 0x0edd,
-  0x0f00, 0x0f00,
-  0x0f18, 0x0f19,
-  0x0f20, 0x0f33,
-  0x0f35, 0x0f35,
-  0x0f37, 0x0f37,
-  0x0f39, 0x0f39,
-  0x0f3e, 0x0f47,
-  0x0f49, 0x0f6a,
-  0x0f71, 0x0f84,
-  0x0f86, 0x0f8b,
-  0x0f90, 0x0f97,
-  0x0f99, 0x0fbc,
-  0x0fc6, 0x0fc6,
-  0x1000, 0x1021,
-  0x1023, 0x1027,
-  0x1029, 0x102a,
-  0x102c, 0x1032,
-  0x1036, 0x1039,
-  0x1040, 0x1049,
-  0x1050, 0x1059,
-  0x10a0, 0x10c5,
-  0x10d0, 0x10f8,
-  0x1100, 0x1159,
-  0x115f, 0x11a2,
-  0x11a8, 0x11f9,
-  0x1200, 0x1206,
-  0x1208, 0x1246,
-  0x1248, 0x1248,
-  0x124a, 0x124d,
-  0x1250, 0x1256,
-  0x1258, 0x1258,
-  0x125a, 0x125d,
-  0x1260, 0x1286,
-  0x1288, 0x1288,
-  0x128a, 0x128d,
-  0x1290, 0x12ae,
-  0x12b0, 0x12b0,
-  0x12b2, 0x12b5,
-  0x12b8, 0x12be,
-  0x12c0, 0x12c0,
-  0x12c2, 0x12c5,
-  0x12c8, 0x12ce,
-  0x12d0, 0x12d6,
-  0x12d8, 0x12ee,
-  0x12f0, 0x130e,
-  0x1310, 0x1310,
-  0x1312, 0x1315,
-  0x1318, 0x131e,
-  0x1320, 0x1346,
-  0x1348, 0x135a,
-  0x1369, 0x137c,
-  0x13a0, 0x13f4,
-  0x1401, 0x166c,
-  0x166f, 0x1676,
-  0x1681, 0x169a,
-  0x16a0, 0x16ea,
-  0x16ee, 0x16f0,
-  0x1700, 0x170c,
-  0x170e, 0x1714,
-  0x1720, 0x1734,
-  0x1740, 0x1753,
-  0x1760, 0x176c,
-  0x176e, 0x1770,
-  0x1772, 0x1773,
-  0x1780, 0x17b3,
-  0x17b6, 0x17d3,
-  0x17d7, 0x17d7,
-  0x17dc, 0x17dd,
-  0x17e0, 0x17e9,
-  0x17f0, 0x17f9,
-  0x180b, 0x180d,
-  0x1810, 0x1819,
-  0x1820, 0x1877,
-  0x1880, 0x18a9,
-  0x1900, 0x191c,
-  0x1920, 0x192b,
-  0x1930, 0x193b,
-  0x1946, 0x196d,
-  0x1970, 0x1974,
-  0x1d00, 0x1d6b,
-  0x1e00, 0x1e9b,
-  0x1ea0, 0x1ef9,
-  0x1f00, 0x1f15,
-  0x1f18, 0x1f1d,
-  0x1f20, 0x1f45,
-  0x1f48, 0x1f4d,
-  0x1f50, 0x1f57,
-  0x1f59, 0x1f59,
-  0x1f5b, 0x1f5b,
-  0x1f5d, 0x1f5d,
-  0x1f5f, 0x1f7d,
-  0x1f80, 0x1fb4,
-  0x1fb6, 0x1fbc,
-  0x1fbe, 0x1fbe,
-  0x1fc2, 0x1fc4,
-  0x1fc6, 0x1fcc,
-  0x1fd0, 0x1fd3,
-  0x1fd6, 0x1fdb,
-  0x1fe0, 0x1fec,
-  0x1ff2, 0x1ff4,
-  0x1ff6, 0x1ffc,
-  0x203f, 0x2040,
-  0x2054, 0x2054,
-  0x2070, 0x2071,
-  0x2074, 0x2079,
-  0x207f, 0x2089,
-  0x20d0, 0x20ea,
-  0x2102, 0x2102,
-  0x2107, 0x2107,
-  0x210a, 0x2113,
-  0x2115, 0x2115,
-  0x2119, 0x211d,
-  0x2124, 0x2124,
-  0x2126, 0x2126,
-  0x2128, 0x2128,
-  0x212a, 0x212d,
-  0x212f, 0x2131,
-  0x2133, 0x2139,
-  0x213d, 0x213f,
-  0x2145, 0x2149,
-  0x2153, 0x2183,
-  0x2460, 0x249b,
-  0x24ea, 0x24ff,
-  0x2776, 0x2793,
-  0x3005, 0x3007,
-  0x3021, 0x302f,
-  0x3031, 0x3035,
-  0x3038, 0x303c,
-  0x3041, 0x3096,
-  0x3099, 0x309a,
-  0x309d, 0x309f,
-  0x30a1, 0x30ff,
-  0x3105, 0x312c,
-  0x3131, 0x318e,
-  0x3192, 0x3195,
-  0x31a0, 0x31b7,
-  0x31f0, 0x31ff,
-  0x3220, 0x3229,
-  0x3251, 0x325f,
-  0x3280, 0x3289,
-  0x32b1, 0x32bf,
-  0x3400, 0x4db5,
-  0x4e00, 0x9fa5,
-  0xa000, 0xa48c,
-  0xac00, 0xd7a3,
-  0xf900, 0xfa2d,
-  0xfa30, 0xfa6a,
-  0xfb00, 0xfb06,
-  0xfb13, 0xfb17,
-  0xfb1d, 0xfb28,
-  0xfb2a, 0xfb36,
-  0xfb38, 0xfb3c,
-  0xfb3e, 0xfb3e,
-  0xfb40, 0xfb41,
-  0xfb43, 0xfb44,
-  0xfb46, 0xfbb1,
-  0xfbd3, 0xfd3d,
-  0xfd50, 0xfd8f,
-  0xfd92, 0xfdc7,
-  0xfdf0, 0xfdfb,
-  0xfe00, 0xfe0f,
-  0xfe20, 0xfe23,
-  0xfe33, 0xfe34,
-  0xfe4d, 0xfe4f,
-  0xfe70, 0xfe74,
-  0xfe76, 0xfefc,
-  0xff10, 0xff19,
-  0xff21, 0xff3a,
-  0xff3f, 0xff3f,
-  0xff41, 0xff5a,
-  0xff65, 0xffbe,
-  0xffc2, 0xffc7,
-  0xffca, 0xffcf,
-  0xffd2, 0xffd7,
-  0xffda, 0xffdc,
-  0x10000, 0x1000b,
-  0x1000d, 0x10026,
-  0x10028, 0x1003a,
-  0x1003c, 0x1003d,
-  0x1003f, 0x1004d,
-  0x10050, 0x1005d,
-  0x10080, 0x100fa,
-  0x10107, 0x10133,
-  0x10300, 0x1031e,
-  0x10320, 0x10323,
-  0x10330, 0x1034a,
-  0x10380, 0x1039d,
-  0x10400, 0x1049d,
-  0x104a0, 0x104a9,
-  0x10800, 0x10805,
-  0x10808, 0x10808,
-  0x1080a, 0x10835,
-  0x10837, 0x10838,
-  0x1083c, 0x1083c,
-  0x1083f, 0x1083f,
-  0x1d165, 0x1d169,
-  0x1d16d, 0x1d172,
-  0x1d17b, 0x1d182,
-  0x1d185, 0x1d18b,
-  0x1d1aa, 0x1d1ad,
-  0x1d400, 0x1d454,
-  0x1d456, 0x1d49c,
-  0x1d49e, 0x1d49f,
-  0x1d4a2, 0x1d4a2,
-  0x1d4a5, 0x1d4a6,
-  0x1d4a9, 0x1d4ac,
-  0x1d4ae, 0x1d4b9,
-  0x1d4bb, 0x1d4bb,
-  0x1d4bd, 0x1d4c3,
-  0x1d4c5, 0x1d505,
-  0x1d507, 0x1d50a,
-  0x1d50d, 0x1d514,
-  0x1d516, 0x1d51c,
-  0x1d51e, 0x1d539,
-  0x1d53b, 0x1d53e,
-  0x1d540, 0x1d544,
-  0x1d546, 0x1d546,
-  0x1d54a, 0x1d550,
-  0x1d552, 0x1d6a3,
-  0x1d6a8, 0x1d6c0,
-  0x1d6c2, 0x1d6da,
-  0x1d6dc, 0x1d6fa,
-  0x1d6fc, 0x1d714,
-  0x1d716, 0x1d734,
-  0x1d736, 0x1d74e,
-  0x1d750, 0x1d76e,
-  0x1d770, 0x1d788,
-  0x1d78a, 0x1d7a8,
-  0x1d7aa, 0x1d7c2,
-  0x1d7c4, 0x1d7c9,
-  0x1d7ce, 0x1d7ff,
-  0x20000, 0x2a6d6,
-  0x2f800, 0x2fa1d,
-  0xe0100, 0xe01ef
-#endif /* USE_UNICODE_FULL_RANGE_CTYPE */
-}; /* end of MBWord */
 
 
 static int
-utf8_get_ctype_code_range(int ctype,
-                          const OnigCodePoint* sbr[], const OnigCodePoint* mbr[])
+get_ctype_code_range(OnigCtype ctype, OnigCodePoint *sb_out,
+                    const OnigCodePoint* ranges[])
 {
-#define CR_SET(sbl,mbl) do { \
-  *sbr = sbl; \
-  *mbr = mbl; \
-} while (0)
-
-#define CR_SB_SET(sbl) do { \
-  *sbr = sbl; \
-  *mbr = EmptyRange; \
-} while (0)
-
-  switch (ctype) {
-  case ONIGENC_CTYPE_ALPHA:
-    CR_SET(SBAlpha, MBAlpha);
-    break;
-  case ONIGENC_CTYPE_BLANK:
-    CR_SET(SBBlank, MBBlank);
-    break;
-  case ONIGENC_CTYPE_CNTRL:
-    CR_SET(SBCntrl, MBCntrl);
-    break;
-  case ONIGENC_CTYPE_DIGIT:
-    CR_SET(SBDigit, MBDigit);
-    break;
-  case ONIGENC_CTYPE_GRAPH:
-    CR_SET(SBGraph, MBGraph);
-    break;
-  case ONIGENC_CTYPE_LOWER:
-    CR_SET(SBLower, MBLower);
-    break;
-  case ONIGENC_CTYPE_PRINT:
-    CR_SET(SBPrint, MBPrint);
-    break;
-  case ONIGENC_CTYPE_PUNCT:
-    CR_SET(SBPunct, MBPunct);
-    break;
-  case ONIGENC_CTYPE_SPACE:
-    CR_SET(SBSpace, MBSpace);
-    break;
-  case ONIGENC_CTYPE_UPPER:
-    CR_SET(SBUpper, MBUpper);
-    break;
-  case ONIGENC_CTYPE_XDIGIT:
-    CR_SB_SET(SBXDigit);
-    break;
-  case ONIGENC_CTYPE_WORD:
-    CR_SET(SBWord, MBWord);
-    break;
-  case ONIGENC_CTYPE_ASCII:
-    CR_SB_SET(SBASCII);
-    break;
-  case ONIGENC_CTYPE_ALNUM:
-    CR_SET(SBAlnum, MBAlnum);
-    break;
-
-  default:
-    return ONIGENCERR_TYPE_BUG;
-    break;
-  }
-
-  return 0;
+  *sb_out = 0x80;
+  return onigenc_unicode_ctype_code_range(ctype, ranges);
 }
 
-static int
-utf8_is_code_ctype(OnigCodePoint code, unsigned int ctype)
-{
-#ifdef USE_UNICODE_FULL_RANGE_CTYPE
-  const OnigCodePoint *range;
-#endif
-
-  if (code < 256) {
-    return ONIGENC_IS_UNICODE_ISO_8859_1_CTYPE(code, ctype);
-  }
-
-#ifdef USE_UNICODE_FULL_RANGE_CTYPE
-
-  switch (ctype) {
-  case ONIGENC_CTYPE_ALPHA:
-    range = MBAlpha;
-    break;
-  case ONIGENC_CTYPE_BLANK:
-    range = MBBlank;
-    break;
-  case ONIGENC_CTYPE_CNTRL:
-    range = MBCntrl;
-    break;
-  case ONIGENC_CTYPE_DIGIT:
-    range = MBDigit;
-    break;
-  case ONIGENC_CTYPE_GRAPH:
-    range = MBGraph;
-    break;
-  case ONIGENC_CTYPE_LOWER:
-    range = MBLower;
-    break;
-  case ONIGENC_CTYPE_PRINT:
-    range = MBPrint;
-    break;
-  case ONIGENC_CTYPE_PUNCT:
-    range = MBPunct;
-    break;
-  case ONIGENC_CTYPE_SPACE:
-    range = MBSpace;
-    break;
-  case ONIGENC_CTYPE_UPPER:
-    range = MBUpper;
-    break;
-  case ONIGENC_CTYPE_XDIGIT:
-    return FALSE;
-    break;
-  case ONIGENC_CTYPE_WORD:
-    range = MBWord;
-    break;
-  case ONIGENC_CTYPE_ASCII:
-    return FALSE;
-    break;
-  case ONIGENC_CTYPE_ALNUM:
-    range = MBAlnum;
-    break;
-  case ONIGENC_CTYPE_NEWLINE:
-    return FALSE;
-    break;
-
-  default:
-    return ONIGENCERR_TYPE_BUG;
-    break;
-  }
-
-  return onig_is_in_code_range((UChar* )range, code);
-
-#else
-
-  if ((ctype & ONIGENC_CTYPE_WORD) != 0) {
-#ifdef USE_INVALID_CODE_SCHEME
-    if (code <= VALID_CODE_LIMIT)
-#endif
-      return TRUE;
-  }
-#endif /* USE_UNICODE_FULL_RANGE_CTYPE */
-
-  return FALSE;
-}
 
 static UChar*
-utf8_left_adjust_char_head(const UChar* start, const UChar* s)
+left_adjust_char_head(const UChar* start, const UChar* s)
 {
   const UChar *p;
 
@@ -3700,31 +277,29 @@ utf8_left_adjust_char_head(const UChar* start, const UChar* s)
   return (UChar* )p;
 }
 
+static int
+get_case_fold_codes_by_str(OnigCaseFoldType flag,
+    const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[])
+{
+  return onigenc_unicode_get_case_fold_codes_by_str(ONIG_ENCODING_UTF8,
+                                                   flag, p, end, items);
+}
+
 OnigEncodingType OnigEncodingUTF8 = {
-  utf8_mbc_enc_len,
+  mbc_enc_len,
   "UTF-8",     /* name */
   6,           /* max byte length */
   1,           /* min byte length */
-  (ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE | 
-   ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE ),
-  {
-      (OnigCodePoint )'\\'                       /* esc */
-    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.'  */
-    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*'  */
-    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
-    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
-    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
-  },
-  utf8_is_mbc_newline,
-  utf8_mbc_to_code,
-  utf8_code_to_mbclen,
-  utf8_code_to_mbc,
-  utf8_mbc_to_normalize,
-  utf8_is_mbc_ambiguous,
-  onigenc_iso_8859_1_get_all_pair_ambig_codes,
-  onigenc_ess_tsett_get_all_comp_ambig_codes,
-  utf8_is_code_ctype,
-  utf8_get_ctype_code_range,
-  utf8_left_adjust_char_head,
+  is_mbc_newline,
+  mbc_to_code,
+  code_to_mbclen,
+  code_to_mbc,
+  mbc_case_fold,
+  onigenc_unicode_apply_all_case_fold,
+  get_case_fold_codes_by_str,
+  onigenc_unicode_property_name_to_ctype,
+  onigenc_unicode_is_code_ctype,
+  get_ctype_code_range,
+  left_adjust_char_head,
   onigenc_always_true_is_allowed_reverse_match
 };
index d55f1cc94f7242a50f04cd95bcf69a80e4008a3d..af3426ce32892bc5aa29fb8623b90071762cf036 100755 (executable)
@@ -8,7 +8,7 @@
 <h1>Oniguruma</h1> (<a href="index_ja.html">Japanese</a>)
 
 <p>
-(c) K.Kosako, updated at: 2007/08/16
+(c) K.Kosako, updated at: 2010/01/09
 </p>
 
 <dl>
@@ -16,8 +16,8 @@
 <dt><b>What's new</b>
 </font>
 <ul>
+<li>2010/01/09: Version 5.9.2 released.</li>
 <li>2007/08/16: Version 4.7.1 released.</li>
-<li>2007/07/14: Version 5.9.0 released.</li>
 <li>2007/06/20: Version 2.5.9 released.</li>
 <li>2007/06/20: Maintainer of 2.x was changed.</li>
 </ul>
@@ -62,9 +62,8 @@ ISO-8859-11, ISO-8859-13, ISO-8859-14, ISO-8859-15, ISO-8859-16<br>
 
 <dt><b>Download:</b>
 <ul>
-<li> <a href="archive/onig-5.9.0.tar.gz">Latest release version 5.9.0</a> (2007/07/14)   <a href="HISTORY_5X.txt">Change Log</a>
-<li> <a href="archive/onig-5.8.0.tar.gz">5.8.0</a> (2007/06/04)
-<li> <a href="archive/onig-5.7.0.tar.gz">5.7.0</a> (2007/04/27)
+<li> <a href="archive/onig-5.9.2.tar.gz">Latest release version 5.9.2</a> (2010/01/09)   <a href="HISTORY_5X.txt">Change Log</a>
+<li> <a href="archive/onig-5.9.1.tar.gz">5.9.1</a> (2007/12/22)
 <li> <a href="archive/onig-4.7.1.tar.gz">Latest release version 4.7.1</a> (2007/08/16)   <a href="HISTORY_4X.txt">Change Log</a>
 <li> <a href="archive/onig-4.7.0.tar.gz">4.7.0</a> (2007/06/18)
 <li> <a href="archive/onigd2_5_9.tar.gz">Latest release version 2.5.9</a> (2007/06/20)   <a href="HISTORY_2X.txt">Change Log</a>
@@ -79,7 +78,7 @@ About 2.x, please contact him.<br>
 * 2.x supports Ruby1.6/1.8.<br>
 
 <br>
-<dt><b>Documents:</b> (version 5.9.0)
+<dt><b>Documents:</b> (version 5.9.2)
 <ul>
  <li> <a href="doc/RE.txt">Regular Expressions</a>
       <a href="doc/RE.ja.txt">(Japanese: EUC-JP)</a>
@@ -133,6 +132,7 @@ About 2.x, please contact him.<br>
 <li> <a href="http://www.php.gr.jp/">Japan PHP User Group</a> PHP 5.0 mb_ereg (Japanese page)
 <li> <a href="http://yatsu.info/wiki/Pufui/">Pufui (Mac OS X)</a> (Japanese page)
 <li> <a href="http://ultrapop.jp/?q2ch">q2ch</a> (Japanese page)
+<li> <a href="http://search.cpan.org/~andya/re-engine-Oniguruma">re-engine-Oniguruma</a>
 <li> <a href="http://harumune.s56.xrea.com/assari/index.php?RSSTyping">RSSTyping</a> (Japanese page)
 <li> <a href="http://tobysoft.net/wiki/index.php?Ruby%2Fruby-win32-oniguruma">ruby-win32-oniguruma</a> (Japanese page)
 <li> <a href="http://quux.s74.xrea.com/">SevenFour (Mac OS X)</a> (Japanese page)
@@ -142,6 +142,7 @@ About 2.x, please contact him.<br>
 <li> <a href="http://www.cyanworks.net/mac.html">TunesTEXT (Mac OS X)</a>
 <li> <a href="http://sourceforge.jp/projects/frogger/">XML parser</a>
 <li> <a href="http://www.yokkasoft.net/">YokkaSoft (Win32)</a> (Japanese page)
+<li> <a href="http://www.hi-ho.ne.jp/kuze/tool.htm">Zed (Win32)</a> (Japanese page)
 </ul>
 
 <br>
@@ -174,14 +175,13 @@ and I'm thankful to Akinori MUSHA.
 <li> <a href="http://re2c.org/">re2c</a>
 <li> <a href="http://tiny-rex.sourceforge.net/">T-Rex</a>
 <li> <a href="http://laurikari.net/tre/">TRE</a>
+<li> <a href="http://svn.codehaus.org/jruby/joni/">Joni (Java)</a>
 <li> <a href="http://jregex.sourceforge.net/">JRegex (Java)</a>
 <li> <a href="http://www.cacas.org/java/gnu/regexp/">gnu.regexp for Java</a>
 <li> <a href="http://jakarta.apache.org/regexp/index.html">Jakarta Project Regexp</a>
 <li> <a href="http://jakarta.apache.org/oro/">Jakarta Project ORO</a>
+<li> <a href="http://sourceforge.jp/projects/onig4j/">Oniguruma for Java</a>
 </ul>
 </dl>
-
-<hr>
-<a href="../">Back to Home</a>
 </body>
 </html>
diff --git a/ext/mbstring/oniguruma/index_ja.html b/ext/mbstring/oniguruma/index_ja.html
new file mode 100644 (file)
index 0000000..e03b045
--- /dev/null
@@ -0,0 +1,190 @@
+<html>
+<head>
+  <meta HTTP-EQUIV="Content-Type" CONTENT="text/html;CHARSET=utf-8">
+  <title>鬼車</title>
+</head>
+<body BGCOLOR="#ffffff" VLINK="#808040" TEXT="#696969">
+
+<h1>鬼車</h1>
+
+<p>
+(c) K.Kosako, 最終更新: 2010/01/09
+</p>
+
+<dl>
+<font color="orange">
+<dt><b>更新情報</b>
+</font>
+<ul>
+<li>2010/01/09: Version 5.9.2 リリース</li>
+<li>2007/08/16: Version 4.7.1 リリース</li>
+<li>2007/06/20: Version 2.5.9 リリース</li>
+<li>2007/06/20: 2.xの保守担当者を変更</li>
+</ul>
+</dl>
+<hr>
+
+<p>
+鬼車は正規表現ライブラリである。<br>
+このライブラリの特徴は、それぞれの正規表現オブジェクトごとに異なる文字エンコーディングを
+指定できること。<br>
+(API: GNU regex, POSIX and Oniguruma native)
+</p>
+
+<dl>
+<dt><b>対応している文字エンコーディング:</b><br>
+ASCII, UTF-8, UTF-16BE, UTF-16LE, UTF-32BE, UTF-32LE,<br>
+EUC-JP, EUC-TW, EUC-KR, EUC-CN,<br>
+Shift_JIS, Big5, GB18030, KOI8-R, CP1251,<br>
+ISO-8859-1, ISO-8859-2, ISO-8859-3, ISO-8859-4, ISO-8859-5,<br>
+ISO-8859-6, ISO-8859-7, ISO-8859-8, ISO-8859-9, ISO-8859-10,<br>
+ISO-8859-11, ISO-8859-13, ISO-8859-14, ISO-8859-15, ISO-8859-16<br>
+<font color="orange">
+(GB18030は、KUBO Takehiro氏提供)<br>
+(CP1251は、Byte氏提供)
+</font>
+</p>
+</dl>
+
+<hr>
+
+<dt><b>ライセンス:</b>BSDライセンス
+
+<dl>
+<dt><b>プラットフォーム:</b>
+<ul>
+<li> Unix (Mac OS Xを含む)
+<li> Cygwin
+<li> Win32
+</ul>
+
+<br>
+
+<dt><b>ダウンロード:</b>
+<ul>
+<li> <a href="archive/onig-5.9.2.tar.gz">5.9.2 最新版</a> (2010/01/09)   <a href="HISTORY_5X.txt">更新履歴</a>
+<li> <a href="archive/onig-5.9.1.tar.gz">5.9.1</a> (2007/12/22)
+<li> <a href="archive/onig-4.7.1.tar.gz">4.7.1 最新版</a> (2007/08/16)   <a href="HISTORY_4X.txt">更新履歴</a>
+<li> <a href="archive/onig-4.7.0.tar.gz">4.7.0</a> (2007/06/18)
+<li> <a href="archive/onigd2_5_9.tar.gz">2.5.9 最新版</a> (2007/06/20)   <a href="HISTORY_2X.txt">更新履歴</a>
+</ul>
+
+<br>
+<font color="red">
+2.xの保守担当は、Hannes Wyss &lt;hwyss AT ywesee.com&gt;に交替しました。<br>
+2.xについては、彼に連絡してください。<br>
+</font>
+* 5.xはUnicode Property/Scriptを提供<br>
+* 2.xはRuby1.6/1.8組込みライブラリとして動作する。 (2006年末で保守を終了)<br>
+
+<br>
+<dt><b>ドキュメント:</b> (version 5.9.2)
+<ul>
+ <li> <a href="doc/RE.txt">正規表現</a>
+      <a href="doc/RE.ja.txt">(日本語: EUC-JP)</a>
+ <li> <a href="doc/API.txt">鬼車API</a>
+      <a href="doc/API.ja.txt">(日本語: EUC-JP)</a>
+</ul>
+
+<br>
+<dt><b>サンプルプログラム:</b>
+<ul>
+ <li><a href="sample/simple.c">最小使用例</a>
+ <li><a href="sample/sql.c">可変文法と可変メタ文字機能使用例(SQL-like pattern match)</a>
+</ul>
+
+<br>
+<dt><b>サイト:</b>
+<ul>
+<li> <a href="http://www.freebsd.org/cgi/cvsweb.cgi/ports/devel/oniguruma/">FreeBSD ports</a>
+<li> <a href="http://www.softantenna.com/lib/1953/index.html">SoftAntenna &gt; Lib &gt;  Oniguruma</a> (日本語)
+</ul>
+
+<br>
+<dt><b>リンク:</b>
+<ul>
+<li> <a href="http://homepage3.nifty.com/k-takata/mysoft/bregonig.html">bregonig.dll (Win32)</a> (日本語)
+<li> <a href="http://www.halbiz.com/osaru/cnregex.html">cnRegex 4D Plugin (Mac OS X)</a> (日本語)
+<li> <a href="http://kmaebashi.com/">crowbar</a> (日本語)
+<li> <a href="http://oniguruma5.darwinports.com">Darwin Ports (Mac OS X)</a>
+<li> <a href="http://homepage2.nifty.com/Km/onig.htm">Delphi interface (Win32)</a> (日本語)
+<li> <a href="http://pyxis-project.net/ensemble/">Ensemble (Mac OS X)</a> (日本語)
+<li> <a href="http://www.srcw.net/FaEdit/">FaEdit (Win32)</a> (日本語)
+<li> <a href="http://www.tom.sfc.keio.ac.jp/~sakai/d/?date=20050209">GHC patch</a> Masahiro Sakai (Japanese Blog)
+<li> <a href="http://www.gyazsquare.com/gyazmail/index.php">GyazMail (Mac OS X)</a>
+<li> <a href="http://www5d.biglobe.ne.jp/~f-taste/knt3/jcref3.html">J-cref v3</a> (日本語)
+<li> <a href="http://www.artman21.net/">Jedit X (Mac OS X)</a>
+<li> <a href="http://www.chitora.jp/lhaz.html">Lhaz (Win32)</a> (日本語)
+<li> <a href="http://limechat.net/">LimeChat</a> (日本語)
+<li> <a href="http://medb.enhiro.com/">meDB</a> (日本語)
+<li> <a href="http://monaos.org/">Mona OS</a>
+<li> <a href="http://mongoose.jp/">mongoose</a> (日本語)
+<li> <a href="http://www.irori.org/tool/mregexp.html">mregexp</a> (日本語)
+<li> <a href="http://ochusha.sourceforge.jp/">おちゅ〜しゃ</a> (日本語)
+<li> <a href="http://www8.ocn.ne.jp/%7esonoisa/OgreKit/index.html">OgreKit (Mac OS X)</a> Regular Expression Framework for Cocoa (日本語)
+<li> <a href="http://www.kanetaka.net/4dapi/wiki4d.dll/4dcgi/wiki.cgi?plugins-oniguruma">OnigRegexp</a> (日本語)
+<li> <a href="http://rubyforge.org/projects/oniguruma">Oniguruma for Ruby</a>
+<li> <a href="http://openspace.timedia.co.jp/~yasuyuki/wiliki/wiliki.cgi?Oniguruma-mysqld&l=jp">Oniguruma-mysqld</a>
+<li> <a href="http://www.void.in/wiki/OnigPP">OnigPP</a> (日本語)
+<li> <a href="http://www.kt.rim.or.jp/~kbk/sed/index.html">Onigsed (Win32)</a> (日本語)
+<li> <a href="http://glozer.net/code.html#oregexp">oregexp</a> Erlang binding
+<li> <a href="http://www.kt.rim.or.jp/~kbk/yagrep/index.html">yagrep (Win32)</a> (日本語)
+<li> <a href="http://www.php.gr.jp/">日本PHPユーザ会</a> PHP 5.0 mb_ereg (日本語)
+<li> <a href="http://yatsu.info/wiki/Pufui/">Pufui (Mac OS X)</a> (日本語)
+<li> <a href="http://ultrapop.jp/?q2ch">q2ch</a> (日本語)
+<li> <a href="http://search.cpan.org/~andya/re-engine-Oniguruma">re-engine-Oniguruma</a>
+<li> <a href="http://harumune.s56.xrea.com/assari/index.php?RSSTyping">RSSTyping</a> (日本語)
+<li> <a href="http://tobysoft.net/wiki/index.php?Ruby%2Fruby-win32-oniguruma">ruby-win32-oniguruma</a> (日本語)
+<li> <a href="http://quux.s74.xrea.com/">SevenFour (Mac OS X)</a> (日本語)
+<li> <a href="http://storklab.cyber-ninja.jp/">Stork Lab. Products (Mac OS X)</a> (日本語)
+<li> <a href="http://sourceforge.jp/projects/ttssh2/">TeraTerm (Win32)</a>
+<li> <a href="http://www8.ocn.ne.jp/~sonoisa/TiddlyWikiPod/">TiddlyWikiPod (Mac OS X)</a>
+<li> <a href="http://www.cyanworks.net/mac.html">TunesTEXT (Mac OS X)</a>
+<li> <a href="http://sourceforge.jp/projects/frogger/">XML parser</a>
+<li> <a href="http://www.yokkasoft.net/">YokkaSoft (Win32)</a> (日本語)
+<li> <a href="http://www.hi-ho.ne.jp/kuze/tool.htm">Zed (Win32)</a> (日本語)
+</ul>
+
+<br>
+<dt><b>参考資料:</b>
+<ul>
+<li> <a href="http://www.ruby-lang.org/ja/man/index.cgi?cmd=view;name=%C0%B5%B5%AC%C9%BD%B8%BD">Rubyリファレンスマニュアル</a> (日本語)
+<li> <a href="http://www.perl.com/doc/manual/html/pod/perlre.html">Perl regular expressions</a>
+<li> <a href="http://java.sun.com/j2se/1.4.2/docs/api/java/util/regex/Pattern.html">java.util.regex.Pattern (J2SE 1.4.2)</a>
+<li> <a href="http://www.opengroup.org/onlinepubs/007908799/xbd/re.html">The Open Group</a>
+<li> <a href="http://regex.info/">Mastering Regular Expressions</a>
+<li> <a href="http://www.unicode.org/">Unicode Home Page</a>
+<li> <a href="http://www.kt.rim.or.jp/~kbk/regex/regex.html">正規表現メモ</a> (日本語)
+<li> <a href="http://www.din.or.jp/~ohzaki/regex.htm">Perl正規表現雑技</a> (日本語)
+</ul>
+
+<br>
+</dl>
+<p>
+and I'm thankful to Akinori MUSHA.
+</p>
+
+<hr>
+<dl>
+<dt><b>他のライブラリ:</b>
+<ul>
+<li> <a href="http://www.boost.org/libs/regex/doc/">Boost.Regex</a>
+<li> <a href="http://arglist.com/regex/">A copy of Henry Spencer's</a>
+<li> <a href="http://directory.fsf.org/regex.html">GNU regex</a>
+<li> <a href="http://www.pcre.org/">PCRE</a>
+<li> <a href="http://re2c.org/">re2c</a>
+<li> <a href="http://tiny-rex.sourceforge.net/">T-Rex</a>
+<li> <a href="http://laurikari.net/tre/">TRE</a>
+<li> <a href="http://svn.codehaus.org/jruby/joni/">Joni (Java)</a>
+<li> <a href="http://jregex.sourceforge.net/">JRegex (Java)</a>
+<li> <a href="http://www.cacas.org/java/gnu/regexp/">gnu.regexp for Java</a>
+<li> <a href="http://jakarta.apache.org/regexp/index.html">Jakarta Project Regexp</a>
+<li> <a href="http://jakarta.apache.org/oro/">Jakarta Project ORO</a>
+<li> <a href="http://sourceforge.jp/projects/onig4j/">Oniguruma for Java</a>
+</ul>
+</dl>
+
+<hr>
+<a href="../">ホームにもどる</a>
+</body>
+</html>
index cfeb88a2928e7a07ae5ebdc7842f57cb40f5c27d..f1cb35fbd878373c6e5905577cff92297660a525 100644 (file)
@@ -97,7 +97,7 @@ typedef struct {
 
 #ifndef ONIG_EXTERN
 #if defined(_WIN32) && !defined(__GNUC__)
-#if defined(EXPORT) || defined(RUBY_EXPORT)
+#if defined(EXPORT)
 #define ONIG_EXTERN   extern __declspec(dllexport)
 #else
 #define ONIG_EXTERN   extern __declspec(dllimport)
index 5196a3d585d8aa39fb3860eff2671fc53f72ea2c..3b557639f1daea2d1f440d171a1705ff9c442711 100644 (file)
@@ -4,7 +4,7 @@
   oniguruma.h - Oniguruma (regular expression library)
 **********************************************************************/
 /*-
- * Copyright (c) 2002-2007  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp>
+ * Copyright (c) 2002-2009  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * SUCH DAMAGE.
  */
 
-#include "../php_onig_compat.h"
-
 #ifdef __cplusplus
 extern "C" {
 #endif
 
 #define ONIGURUMA
-#define ONIGURUMA_VERSION_MAJOR   4
-#define ONIGURUMA_VERSION_MINOR   7
-#define ONIGURUMA_VERSION_TEENY   1
+#define ONIGURUMA_VERSION_MAJOR   5
+#define ONIGURUMA_VERSION_MINOR   9
+#define ONIGURUMA_VERSION_TEENY   2
 
 #ifdef __cplusplus
 # ifndef  HAVE_PROTOTYPES
@@ -56,6 +54,12 @@ extern "C" {
 # endif
 #endif
 
+#ifdef HAVE_STDARG_H
+# ifndef  HAVE_STDARG_PROTOTYPES
+#  define HAVE_STDARG_PROTOTYPES 1
+# endif
+#endif
+
 #ifndef P_
 #if defined(__STDC__) || defined(_WIN32)
 # define P_(args) args
@@ -94,28 +98,27 @@ extern "C" {
 
 typedef unsigned char  OnigUChar;
 typedef unsigned long  OnigCodePoint;
+typedef unsigned int   OnigCtype;
 typedef unsigned int   OnigDistance;
 
 #define ONIG_INFINITE_DISTANCE  ~((OnigDistance )0)
 
-/* ambiguous match flag */
-typedef unsigned int OnigAmbigType;
+typedef unsigned int OnigCaseFoldType; /* case fold flag */
 
-ONIG_EXTERN OnigAmbigType OnigDefaultAmbigFlag;
+ONIG_EXTERN OnigCaseFoldType OnigDefaultCaseFoldFlag;
 
-#define ONIGENC_AMBIGUOUS_MATCH_NONE                   0
-#define ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE            (1<<0)
-#define ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE         (1<<1)
+/* #define ONIGENC_CASE_FOLD_HIRAGANA_KATAKANA  (1<<1) */
+/* #define ONIGENC_CASE_FOLD_KATAKANA_WIDTH     (1<<2) */
+#define ONIGENC_CASE_FOLD_TURKISH_AZERI         (1<<20)
+#define INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR   (1<<30)
 
-#define ONIGENC_AMBIGUOUS_MATCH_LIMIT                 (1<<1)
+#define ONIGENC_CASE_FOLD_MIN      INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR
+#define ONIGENC_CASE_FOLD_DEFAULT  OnigDefaultCaseFoldFlag
 
-#define ONIGENC_AMBIGUOUS_MATCH_FULL \
- ( ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE | ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE )
-#define ONIGENC_AMBIGUOUS_MATCH_DEFAULT  OnigDefaultAmbigFlag
 
-
-#define ONIGENC_MAX_COMP_AMBIG_CODE_LEN       3
-#define ONIGENC_MAX_COMP_AMBIG_CODE_ITEM_NUM  4
+#define ONIGENC_MAX_COMP_CASE_FOLD_CODE_LEN       3
+#define ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM      13
+/* 13 => Unicode:0x1ffc */
 
 /* code range */
 #define ONIGENC_CODE_RANGE_NUM(range)     ((int )range[0])
@@ -123,20 +126,10 @@ ONIG_EXTERN OnigAmbigType OnigDefaultAmbigFlag;
 #define ONIGENC_CODE_RANGE_TO(range,i)    range[((i)*2) + 2]
 
 typedef struct {
-  int len;
-  OnigCodePoint code[ONIGENC_MAX_COMP_AMBIG_CODE_LEN];
-} OnigCompAmbigCodeItem;
-
-typedef struct {
-  int n;
-  OnigCodePoint code;
-  OnigCompAmbigCodeItem items[ONIGENC_MAX_COMP_AMBIG_CODE_ITEM_NUM];
-} OnigCompAmbigCodes;
-
-typedef struct {
-  OnigCodePoint from;
-  OnigCodePoint to;
-} OnigPairAmbigCodes;
+  int byte_len;  /* argument(original) character(s) byte length */
+  int code_len;  /* number of code */
+  OnigCodePoint code[ONIGENC_MAX_COMP_CASE_FOLD_CODE_LEN];
+} OnigCaseFoldCodeItem;
 
 typedef struct {
   OnigCodePoint esc;
@@ -146,32 +139,24 @@ typedef struct {
   OnigCodePoint one_or_more_time;
   OnigCodePoint anychar_anytime;
 } OnigMetaCharTableType;
+  
+typedef int (*OnigApplyAllCaseFoldFunc)(OnigCodePoint from, OnigCodePoint* to, int to_len, void* arg);
 
-
-#if defined(RUBY_PLATFORM) && defined(M17N_H)
-
-#define ONIG_RUBY_M17N
-typedef m17n_encoding*        OnigEncoding;
-
-#else
-
-typedef struct {
+typedef struct OnigEncodingTypeST {
   int    (*mbc_enc_len)(const OnigUChar* p);
   const char*   name;
   int           max_enc_len;
   int           min_enc_len;
-  OnigAmbigType support_ambig_flag;
-  OnigMetaCharTableType meta_char_table;
   int    (*is_mbc_newline)(const OnigUChar* p, const OnigUChar* end);
   OnigCodePoint (*mbc_to_code)(const OnigUChar* p, const OnigUChar* end);
   int    (*code_to_mbclen)(OnigCodePoint code);
   int    (*code_to_mbc)(OnigCodePoint code, OnigUChar *buf);
-  int    (*mbc_to_normalize)(OnigAmbigType flag, const OnigUChar** pp, const OnigUChar* end, OnigUChar* to);
-  int    (*is_mbc_ambiguous)(OnigAmbigType flag, const OnigUChar** pp, const OnigUChar* end);
-  int    (*get_all_pair_ambig_codes)(OnigAmbigType flag, const OnigPairAmbigCodes** acs);
-  int    (*get_all_comp_ambig_codes)(OnigAmbigType flag, const OnigCompAmbigCodes** acs);
-  int    (*is_code_ctype)(OnigCodePoint code, unsigned int ctype);
-  int    (*get_ctype_code_range)(int ctype, const OnigCodePoint* sb_range[], const OnigCodePoint* mb_range[]);
+  int    (*mbc_case_fold)(OnigCaseFoldType flag, const OnigUChar** pp, const OnigUChar* end, OnigUChar* to);
+  int    (*apply_all_case_fold)(OnigCaseFoldType flag, OnigApplyAllCaseFoldFunc f, void* arg);
+  int    (*get_case_fold_codes_by_str)(OnigCaseFoldType flag, const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem acs[]);
+  int    (*property_name_to_ctype)(struct OnigEncodingTypeST* enc, OnigUChar* p, OnigUChar* end);
+  int    (*is_code_ctype)(OnigCodePoint code, OnigCtype ctype);
+  int    (*get_ctype_code_range)(OnigCtype ctype, OnigCodePoint* sb_out, const OnigCodePoint* ranges[]);
   OnigUChar* (*left_adjust_char_head)(const OnigUChar* start, const OnigUChar* p);
   int    (*is_allowed_reverse_match)(const OnigUChar* p, const OnigUChar* end);
 } OnigEncodingType;
@@ -206,6 +191,7 @@ ONIG_EXTERN OnigEncodingType OnigEncodingEUC_CN;
 ONIG_EXTERN OnigEncodingType OnigEncodingSJIS;
 ONIG_EXTERN OnigEncodingType OnigEncodingKOI8;
 ONIG_EXTERN OnigEncodingType OnigEncodingKOI8_R;
+ONIG_EXTERN OnigEncodingType OnigEncodingCP1251;
 ONIG_EXTERN OnigEncodingType OnigEncodingBIG5;
 ONIG_EXTERN OnigEncodingType OnigEncodingGB18030;
 
@@ -237,136 +223,60 @@ ONIG_EXTERN OnigEncodingType OnigEncodingGB18030;
 #define ONIG_ENCODING_SJIS         (&OnigEncodingSJIS)
 #define ONIG_ENCODING_KOI8         (&OnigEncodingKOI8)
 #define ONIG_ENCODING_KOI8_R       (&OnigEncodingKOI8_R)
+#define ONIG_ENCODING_CP1251       (&OnigEncodingCP1251)
 #define ONIG_ENCODING_BIG5         (&OnigEncodingBIG5)
 #define ONIG_ENCODING_GB18030      (&OnigEncodingGB18030)
 
-#endif /* else RUBY && M17N */
-
 #define ONIG_ENCODING_UNDEF    ((OnigEncoding )0)
 
 
 /* work size */
-#define ONIGENC_CODE_TO_MBC_MAXLEN      7
-#define ONIGENC_MBC_NORMALIZE_MAXLEN    ONIGENC_CODE_TO_MBC_MAXLEN
+#define ONIGENC_CODE_TO_MBC_MAXLEN       7
+#define ONIGENC_MBC_CASE_FOLD_MAXLEN    18
+/* 18: 6(max-byte) * 3(case-fold chars) */
 
 /* character types */
-#define ONIGENC_CTYPE_NEWLINE  (1<< 0)
-#define ONIGENC_CTYPE_ALPHA    (1<< 1)
-#define ONIGENC_CTYPE_BLANK    (1<< 2)
-#define ONIGENC_CTYPE_CNTRL    (1<< 3)
-#define ONIGENC_CTYPE_DIGIT    (1<< 4)
-#define ONIGENC_CTYPE_GRAPH    (1<< 5)
-#define ONIGENC_CTYPE_LOWER    (1<< 6)
-#define ONIGENC_CTYPE_PRINT    (1<< 7)
-#define ONIGENC_CTYPE_PUNCT    (1<< 8)
-#define ONIGENC_CTYPE_SPACE    (1<< 9)
-#define ONIGENC_CTYPE_UPPER    (1<<10)
-#define ONIGENC_CTYPE_XDIGIT   (1<<11)
-#define ONIGENC_CTYPE_WORD     (1<<12)
-#define ONIGENC_CTYPE_ASCII    (1<<13)
-#define ONIGENC_CTYPE_ALNUM    (ONIGENC_CTYPE_ALPHA | ONIGENC_CTYPE_DIGIT)
-
-#define enc_len(enc,p)                ONIGENC_MBC_ENC_LEN(enc, p)
+#define ONIGENC_CTYPE_NEWLINE   0
+#define ONIGENC_CTYPE_ALPHA     1
+#define ONIGENC_CTYPE_BLANK     2
+#define ONIGENC_CTYPE_CNTRL     3
+#define ONIGENC_CTYPE_DIGIT     4
+#define ONIGENC_CTYPE_GRAPH     5
+#define ONIGENC_CTYPE_LOWER     6
+#define ONIGENC_CTYPE_PRINT     7
+#define ONIGENC_CTYPE_PUNCT     8
+#define ONIGENC_CTYPE_SPACE     9
+#define ONIGENC_CTYPE_UPPER    10
+#define ONIGENC_CTYPE_XDIGIT   11
+#define ONIGENC_CTYPE_WORD     12
+#define ONIGENC_CTYPE_ALNUM    13  /* alpha || digit */
+#define ONIGENC_CTYPE_ASCII    14
+#define ONIGENC_MAX_STD_CTYPE  ONIGENC_CTYPE_ASCII
+
+
+#define onig_enc_len(enc,p,end)        ONIGENC_MBC_ENC_LEN(enc,p)
 
 #define ONIGENC_IS_UNDEF(enc)          ((enc) == ONIG_ENCODING_UNDEF)
 #define ONIGENC_IS_SINGLEBYTE(enc)     (ONIGENC_MBC_MAXLEN(enc) == 1)
 #define ONIGENC_IS_MBC_HEAD(enc,p)     (ONIGENC_MBC_ENC_LEN(enc,p) != 1)
 #define ONIGENC_IS_MBC_ASCII(p)           (*(p)   < 128)
 #define ONIGENC_IS_CODE_ASCII(code)       ((code) < 128)
-#define ONIGENC_IS_CODE_SB_WORD(enc,code) \
-  (ONIGENC_IS_CODE_ASCII(code) && ONIGENC_IS_CODE_WORD(enc,code))
 #define ONIGENC_IS_MBC_WORD(enc,s,end) \
    ONIGENC_IS_CODE_WORD(enc,ONIGENC_MBC_TO_CODE(enc,s,end))
 
 
-#ifdef ONIG_RUBY_M17N
-
-#include <ctype.h> /* for isblank(), isgraph() */
-
-#define ONIGENC_MBC_TO_NORMALIZE(enc,flag,pp,end,buf) \
-        onigenc_mbc_to_normalize(enc,flag,pp,end,buf)
-#define ONIGENC_IS_MBC_AMBIGUOUS(enc,flag,pp,end) \
-        onigenc_is_mbc_ambiguous(enc,flag,pp,end)
-
-#define ONIGENC_SUPPORT_AMBIG_FLAG(enc)     ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE
-#define ONIGENC_IS_ALLOWED_REVERSE_MATCH(enc,s,end) \
-        onigenc_is_allowed_reverse_match(enc, s, end)
-#define ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc,start,s) \
-        onigenc_get_left_adjust_char_head(enc, start, s)
-#define ONIGENC_GET_ALL_PAIR_AMBIG_CODES(enc, ambig_flag, acs)    0
-#define ONIGENC_GET_ALL_COMP_AMBIG_CODES(enc, ambig_flag, acs)    0
-#define ONIGENC_GET_CTYPE_CODE_RANGE(enc,ctype,sbr,mbr) \
-        ONIG_NO_SUPPORT_CONFIG
-#define ONIGENC_MBC_ENC_LEN(enc,p)            m17n_mbclen(enc,(int )(*p))
-#define ONIGENC_MBC_MAXLEN(enc)               m17n_mbmaxlen(enc)
-#define ONIGENC_MBC_MAXLEN_DIST(enc) \
-    (ONIGENC_MBC_MAXLEN(enc) > 0 ? ONIGENC_MBC_MAXLEN(enc) \
-                                 : ONIG_INFINITE_DISTANCE)
-#define ONIGENC_MBC_MINLEN(enc)            1
-#define ONIGENC_MBC_TO_CODE(enc,p,e)       m17n_codepoint((enc),(p),(e))
-#define ONIGENC_CODE_TO_MBCLEN(enc,code)   m17n_codelen((enc),(code))
-#define ONIGENC_CODE_TO_MBC(enc,code,buf)  onigenc_code_to_mbc(enc, code, buf)
-
-#if 0     /* !! not supported !! */
-#define ONIGENC_IS_MBC_NEWLINE(enc,p,end)
-#define ONIGENC_STEP_BACK(enc,start,s,n)
-#endif
-
-#define ONIGENC_IS_CODE_CTYPE(enc,code,ctype) \
-        onigenc_is_code_ctype(enc,code,ctype)
-
-#ifdef isblank
-# define ONIGENC_IS_CODE_BLANK(enc,code) isblank((int )code)
-#else
-# define ONIGENC_IS_CODE_BLANK(enc,code) ((code) == ' ' || (code) == '\t')
-#endif
-#ifdef isgraph
-# define ONIGENC_IS_CODE_GRAPH(enc,code) isgraph((int )code)
-#else
-# define ONIGENC_IS_CODE_GRAPH(enc,code) \
-  (isprint((int )code) && !isspace((int )code))
-#endif
-
-#define ONIGENC_IS_CODE_PRINT(enc,code)     m17n_isprint(enc,code)
-#define ONIGENC_IS_CODE_ALNUM(enc,code)     m17n_isalnum(enc,code)
-#define ONIGENC_IS_CODE_ALPHA(enc,code)     m17n_isalpha(enc,code)
-#define ONIGENC_IS_CODE_LOWER(enc,code)     m17n_islower(enc,code)
-#define ONIGENC_IS_CODE_UPPER(enc,code)     m17n_isupper(enc,code)
-#define ONIGENC_IS_CODE_CNTRL(enc,code)     m17n_iscntrl(enc,code)
-#define ONIGENC_IS_CODE_PUNCT(enc,code)     m17n_ispunct(enc,code)
-#define ONIGENC_IS_CODE_SPACE(enc,code)     m17n_isspace(enc,code)
-#define ONIGENC_IS_CODE_DIGIT(enc,code)     m17n_isdigit(enc,code)
-#define ONIGENC_IS_CODE_XDIGIT(enc,code)    m17n_isxdigit(enc,code)
-#define ONIGENC_IS_CODE_WORD(enc,code)      m17n_iswchar(enc,code)
-
-ONIG_EXTERN
-int onigenc_is_code_ctype P_((OnigEncoding enc, OnigCodePoint code, int ctype));
-ONIG_EXTERN
-int onigenc_code_to_mbc P_((OnigEncoding enc, OnigCodePoint code, OnigUChar *buf));
-ONIG_EXTERN
-int onigenc_mbc_to_normalize P_((OnigEncoding enc, OnigAmbigType flag, const OnigUChar** pp, const OnigUChar* end, OnigUChar* buf));
-ONIG_EXTERN
-int onigenc_is_mbc_ambiguous P_((OnigEncoding enc, OnigAmbigType flag, const OnigUChar** pp, const OnigUChar* end));
-ONIG_EXTERN
-int onigenc_is_allowed_reverse_match P_((OnigEncoding enc, const OnigUChar* s, const OnigUChar* end));
-
-#else  /* ONIG_RUBY_M17N */
-
 #define ONIGENC_NAME(enc)                      ((enc)->name)
 
-#define ONIGENC_MBC_TO_NORMALIZE(enc,flag,pp,end,buf) \
-  (enc)->mbc_to_normalize(flag,(const OnigUChar** )pp,end,buf)
-#define ONIGENC_IS_MBC_AMBIGUOUS(enc,flag,pp,end) \
-  (enc)->is_mbc_ambiguous(flag,(const OnigUChar** )pp,end)
-#define ONIGENC_SUPPORT_AMBIG_FLAG(enc)        ((enc)->support_ambig_flag)
+#define ONIGENC_MBC_CASE_FOLD(enc,flag,pp,end,buf) \
+  (enc)->mbc_case_fold(flag,(const OnigUChar** )pp,end,buf)
 #define ONIGENC_IS_ALLOWED_REVERSE_MATCH(enc,s,end) \
         (enc)->is_allowed_reverse_match(s,end)
 #define ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc,start,s) \
         (enc)->left_adjust_char_head(start, s)
-#define ONIGENC_GET_ALL_PAIR_AMBIG_CODES(enc,ambig_flag,acs) \
-        (enc)->get_all_pair_ambig_codes(ambig_flag,acs)
-#define ONIGENC_GET_ALL_COMP_AMBIG_CODES(enc,ambig_flag,acs) \
-        (enc)->get_all_comp_ambig_codes(ambig_flag,acs)
+#define ONIGENC_APPLY_ALL_CASE_FOLD(enc,case_fold_flag,f,arg) \
+        (enc)->apply_all_case_fold(case_fold_flag,f,arg)
+#define ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc,case_fold_flag,p,end,acs) \
+       (enc)->get_case_fold_codes_by_str(case_fold_flag,p,end,acs)
 #define ONIGENC_STEP_BACK(enc,start,s,n) \
         onigenc_step_back((enc),(start),(s),(n))
 
@@ -378,6 +288,8 @@ int onigenc_is_allowed_reverse_match P_((OnigEncoding enc, const OnigUChar* s, c
 #define ONIGENC_MBC_TO_CODE(enc,p,end)         (enc)->mbc_to_code((p),(end))
 #define ONIGENC_CODE_TO_MBCLEN(enc,code)       (enc)->code_to_mbclen(code)
 #define ONIGENC_CODE_TO_MBC(enc,code,buf)      (enc)->code_to_mbc(code,buf)
+#define ONIGENC_PROPERTY_NAME_TO_CTYPE(enc,p,end) \
+  (enc)->property_name_to_ctype(enc,p,end)
 
 #define ONIGENC_IS_CODE_CTYPE(enc,code,ctype)  (enc)->is_code_ctype(code,ctype)
 
@@ -410,14 +322,12 @@ int onigenc_is_allowed_reverse_match P_((OnigEncoding enc, const OnigUChar* s, c
 #define ONIGENC_IS_CODE_WORD(enc,code) \
         ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_WORD)
 
-#define ONIGENC_GET_CTYPE_CODE_RANGE(enc,ctype,sbr,mbr) \
-        (enc)->get_ctype_code_range(ctype,sbr,mbr)
+#define ONIGENC_GET_CTYPE_CODE_RANGE(enc,ctype,sbout,ranges) \
+        (enc)->get_ctype_code_range(ctype,sbout,ranges)
 
 ONIG_EXTERN
 OnigUChar* onigenc_step_back P_((OnigEncoding enc, const OnigUChar* start, const OnigUChar* s, int n));
 
-#endif /* is not ONIG_RUBY_M17N */
-
 
 /* encoding API */
 ONIG_EXTERN
@@ -482,10 +392,11 @@ typedef unsigned int        OnigOptionType;
 
 /* syntax */
 typedef struct {
-  unsigned int  op;
-  unsigned int  op2;
-  unsigned int  behavior;
-  OnigOptionType options;    /* default option */
+  unsigned int   op;
+  unsigned int   op2;
+  unsigned int   behavior;
+  OnigOptionType options;   /* default option */
+  OnigMetaCharTableType meta_char_table;
 } OnigSyntaxType;
 
 ONIG_EXTERN OnigSyntaxType OnigSyntaxASIS;
@@ -566,7 +477,7 @@ ONIG_EXTERN OnigSyntaxType*   OnigDefaultSyntax;
 #define ONIG_SYN_OP2_ESC_GNU_BUF_ANCHOR         (1U<<15) /* \`, \' */
 #define ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY  (1U<<16) /* \p{...}, \P{...} */
 #define ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT (1U<<17) /* \p{^..}, \P{^..} */
-#define ONIG_SYN_OP2_CHAR_PROPERTY_PREFIX_IS    (1U<<18) /* \p{IsXDigit} */
+/* #define ONIG_SYN_OP2_CHAR_PROPERTY_PREFIX_IS (1U<<18) */
 #define ONIG_SYN_OP2_ESC_H_XDIGIT               (1U<<19) /* \h, \H */
 #define ONIG_SYN_OP2_INEFFECTIVE_ESCAPE         (1U<<20) /* \ */
 
@@ -666,6 +577,7 @@ ONIG_EXTERN OnigSyntaxType*   OnigDefaultSyntax;
 #define ONIGERR_NEVER_ENDING_RECURSION                       -221
 #define ONIGERR_GROUP_NUMBER_OVER_FOR_CAPTURE_HISTORY        -222
 #define ONIGERR_INVALID_CHAR_PROPERTY_NAME                   -223
+#define ONIGERR_INVALID_CODE_POINT_VALUE                     -400
 #define ONIGERR_INVALID_WIDE_CHAR_VALUE                      -400
 #define ONIGERR_TOO_BIG_WIDE_CHAR_VALUE                      -401
 #define ONIGERR_NOT_SUPPORTED_ENCODING_COMBINATION           -402
@@ -755,10 +667,10 @@ typedef struct re_pattern_buffer {
   int repeat_range_alloc;
   OnigRepeatRange* repeat_range;
 
-  OnigEncoding  enc;
+  OnigEncoding      enc;
   OnigOptionType    options;
   OnigSyntaxType*   syntax;
-  OnigAmbigType     ambig_flag;
+  OnigCaseFoldType  case_fold_flag;
   void*             name_table;
 
   /* optimization info (string search, char-map and anchors) */
@@ -793,7 +705,7 @@ typedef struct {
   OnigEncoding    target_enc;
   OnigSyntaxType* syntax;
   OnigOptionType  option;
-  OnigAmbigType   ambig_flag;
+  OnigCaseFoldType   case_fold_flag;
 } OnigCompileInfo;
 
 /* Oniguruma Native API */
@@ -808,10 +720,15 @@ void onig_set_verb_warn_func P_((OnigWarnFunc f));
 ONIG_EXTERN
 int onig_new P_((OnigRegex*, const OnigUChar* pattern, const OnigUChar* pattern_end, OnigOptionType option, OnigEncoding enc, OnigSyntaxType* syntax, OnigErrorInfo* einfo));
 ONIG_EXTERN
+int  onig_reg_init P_((regex_t* reg, OnigOptionType option, OnigCaseFoldType case_fold_flag, OnigEncoding enc, OnigSyntaxType* syntax));
+int onig_new_without_alloc P_((OnigRegex, const OnigUChar* pattern, const OnigUChar* pattern_end, OnigOptionType option, OnigEncoding enc, OnigSyntaxType* syntax, OnigErrorInfo* einfo));
+ONIG_EXTERN
 int onig_new_deluxe P_((OnigRegex* reg, const OnigUChar* pattern, const OnigUChar* pattern_end, OnigCompileInfo* ci, OnigErrorInfo* einfo));
 ONIG_EXTERN
 void onig_free P_((OnigRegex));
 ONIG_EXTERN
+void onig_free_body P_((OnigRegex));
+ONIG_EXTERN
 int onig_recompile P_((OnigRegex, const OnigUChar* pattern, const OnigUChar* pattern_end, OnigOptionType option, OnigEncoding enc, OnigSyntaxType* syntax, OnigErrorInfo* einfo));
 ONIG_EXTERN
 int onig_recompile_deluxe P_((OnigRegex reg, const OnigUChar* pattern, const OnigUChar* pattern_end, OnigCompileInfo* ci, OnigErrorInfo* einfo));
@@ -856,7 +773,7 @@ OnigEncoding onig_get_encoding P_((OnigRegex reg));
 ONIG_EXTERN
 OnigOptionType onig_get_options P_((OnigRegex reg));
 ONIG_EXTERN
-OnigAmbigType onig_get_ambig_flag P_((OnigRegex reg));
+OnigCaseFoldType onig_get_case_fold_flag P_((OnigRegex reg));
 ONIG_EXTERN
 OnigSyntaxType* onig_get_syntax P_((OnigRegex reg));
 ONIG_EXTERN
@@ -880,13 +797,13 @@ void onig_set_syntax_behavior P_((OnigSyntaxType* syntax, unsigned int behavior)
 ONIG_EXTERN
 void onig_set_syntax_options P_((OnigSyntaxType* syntax, OnigOptionType options));
 ONIG_EXTERN
-int onig_set_meta_char P_((OnigEncoding enc, unsigned int what, OnigCodePoint code));
+int onig_set_meta_char P_((OnigSyntaxType* syntax, unsigned int what, OnigCodePoint code));
 ONIG_EXTERN
 void onig_copy_encoding P_((OnigEncoding to, OnigEncoding from));
 ONIG_EXTERN
-OnigAmbigType onig_get_default_ambig_flag P_((void));
+OnigCaseFoldType onig_get_default_case_fold_flag P_((void));
 ONIG_EXTERN
-int onig_set_default_ambig_flag P_((OnigAmbigType ambig_flag));
+int onig_set_default_case_fold_flag P_((OnigCaseFoldType case_fold_flag));
 ONIG_EXTERN
 unsigned int onig_get_match_stack_limit_size P_((void));
 ONIG_EXTERN
index 6a0976dee225aa66f8d4c954ec8b80d2313e18d4..f9d99563b3897601669c6608fca22541a14cd3be 100644 (file)
@@ -2,7 +2,7 @@
   regcomp.c -  Oniguruma (regular expression library)
 **********************************************************************/
 /*-
- * Copyright (c) 2002-2007  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp>
+ * Copyright (c) 2002-2008  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
 
 #include "regparse.h"
 
-OnigAmbigType OnigDefaultAmbigFlag =
-  (ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE |
-   ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE);
+OnigCaseFoldType OnigDefaultCaseFoldFlag = ONIGENC_CASE_FOLD_MIN;
 
-extern OnigAmbigType
-onig_get_default_ambig_flag(void)
+extern OnigCaseFoldType
+onig_get_default_case_fold_flag(void)
 {
-  return OnigDefaultAmbigFlag;
+  return OnigDefaultCaseFoldFlag;
 }
 
 extern int
-onig_set_default_ambig_flag(OnigAmbigType ambig_flag)
+onig_set_default_case_fold_flag(OnigCaseFoldType case_fold_flag)
 {
-  OnigDefaultAmbigFlag = ambig_flag;
+  OnigDefaultCaseFoldFlag = case_fold_flag;
   return 0;
 }
 
 
+#ifndef PLATFORM_UNALIGNED_WORD_ACCESS
+static unsigned char PadBuf[WORD_ALIGNMENT_SIZE];
+#endif
+
 static UChar*
-k_strdup(UChar* s, UChar* end)
+str_dup(UChar* s, UChar* end)
 {
   int len = end - s;
 
@@ -62,15 +64,29 @@ k_strdup(UChar* s, UChar* end)
   else return NULL;
 }
 
-/*
-  Caution: node should not be a string node.
-           (s and end member address break)
-*/
 static void
 swap_node(Node* a, Node* b)
 {
   Node c;
   c = *a; *a = *b; *b = c;
+
+  if (NTYPE(a) == NT_STR) {
+    StrNode* sn = NSTR(a);
+    if (sn->capa == 0) {
+      int len = sn->end - sn->s;
+      sn->s   = sn->buf;
+      sn->end = sn->s + len;
+    }
+  }
+
+  if (NTYPE(b) == NT_STR) {
+    StrNode* sn = NSTR(b);
+    if (sn->capa == 0) {
+      int len = sn->end - sn->s;
+      sn->s   = sn->buf;
+      sn->end = sn->s + len;
+    }
+  }
 }
 
 static OnigDistance
@@ -99,7 +115,7 @@ static int
 bitset_is_empty(BitSetRef bs)
 {
   int i;
-  for (i = 0; i < BITSET_SIZE; i++) {
+  for (i = 0; i < (int )BITSET_SIZE; i++) {
     if (bs[i] != 0) return 0;
   }
   return 1;
@@ -122,8 +138,14 @@ bitset_on_num(BitSetRef bs)
 extern int
 onig_bbuf_init(BBuf* buf, int size)
 {
-  buf->p = (UChar* )xmalloc(size);
-  if (IS_NULL(buf->p)) return(ONIGERR_MEMORY);
+  if (size <= 0) {
+    size   = 0;
+    buf->p = NULL;
+  }
+  else {
+    buf->p = (UChar* )xmalloc(size);
+    if (IS_NULL(buf->p)) return(ONIGERR_MEMORY);
+  }
 
   buf->alloc = size;
   buf->used  = 0;
@@ -139,7 +161,7 @@ unset_addr_list_init(UnsetAddrList* uslist, int size)
   UnsetAddr* p;
 
   p = (UnsetAddr* )xmalloc(sizeof(UnsetAddr)* size);
-  CHECK_NULL_RETURN_VAL(p, ONIGERR_MEMORY);
+  CHECK_NULL_RETURN_MEMERR(p);
   uslist->num   = 0;
   uslist->alloc = size;
   uslist->us    = p;
@@ -162,7 +184,7 @@ unset_addr_list_add(UnsetAddrList* uslist, int offset, struct _Node* node)
   if (uslist->num >= uslist->alloc) {
     size = uslist->alloc * 2;
     p = (UnsetAddr* )xrealloc(uslist->us, sizeof(UnsetAddr) * size);
-    CHECK_NULL_RETURN_VAL(p, ONIGERR_MEMORY);
+    CHECK_NULL_RETURN_MEMERR(p);
     uslist->alloc = size;
     uslist->us    = p;
   }
@@ -394,8 +416,8 @@ compile_tree_n_times(Node* node, int n, regex_t* reg)
 }
 
 static int
-add_compile_string_length(UChar* s, int mb_len, int str_len,
-                          regex_t* reg, int ignore_case)
+add_compile_string_length(UChar* s ARG_UNUSED, int mb_len, int str_len,
+                          regex_t* reg ARG_UNUSED, int ignore_case)
 {
   int len;
   int op = select_str_opcode(mb_len, str_len, ignore_case);
@@ -440,20 +462,20 @@ compile_length_string_node(Node* node, regex_t* reg)
   UChar *p, *prev;
   StrNode* sn;
 
-  sn = &(NSTRING(node));
+  sn = NSTR(node);
   if (sn->end <= sn->s)
     return 0;
 
   ambig = NSTRING_IS_AMBIG(node);
 
   p = prev = sn->s;
-  prev_len = enc_len(enc, p);
+  prev_len = enclen(enc, p);
   p += prev_len;
   slen = 1;
   rlen = 0;
 
   for (; p < sn->end; ) {
-    len = enc_len(enc, p);
+    len = enclen(enc, p);
     if (len == prev_len) {
       slen++;
     }
@@ -488,7 +510,7 @@ compile_string_node(Node* node, regex_t* reg)
   UChar *p, *prev, *end;
   StrNode* sn;
 
-  sn = &(NSTRING(node));
+  sn = NSTR(node);
   if (sn->end <= sn->s)
     return 0;
 
@@ -496,12 +518,12 @@ compile_string_node(Node* node, regex_t* reg)
   ambig = NSTRING_IS_AMBIG(node);
 
   p = prev = sn->s;
-  prev_len = enc_len(enc, p);
+  prev_len = enclen(enc, p);
   p += prev_len;
   slen = 1;
 
   for (; p < end; ) {
-    len = enc_len(enc, p);
+    len = enclen(enc, p);
     if (len == prev_len) {
       slen++;
     }
@@ -535,8 +557,6 @@ add_multi_byte_cclass(BBuf* mbuf, regex_t* reg)
   add_length(reg, mbuf->used);
   return add_bytes(reg, mbuf->p, mbuf->used);
 #else
-  static unsigned char PadBuf[WORD_ALIGNMENT_SIZE];
-
   int r, pad_size;
   UChar* p = BBUF_GET_ADD_ADDRESS(reg) + SIZE_LENGTH;
 
@@ -558,7 +578,7 @@ compile_length_cclass_node(CClassNode* cc, regex_t* reg)
 {
   int len;
 
-  if (IS_CCLASS_SHARE(cc)) {
+  if (IS_NCCLASS_SHARE(cc)) {
     len = SIZE_OPCODE + SIZE_POINTER;
     return len;
   }
@@ -588,14 +608,14 @@ compile_cclass_node(CClassNode* cc, regex_t* reg)
 {
   int r;
 
-  if (IS_CCLASS_SHARE(cc)) {
+  if (IS_NCCLASS_SHARE(cc)) {
     add_opcode(reg, OP_CCLASS_NODE);
     r = add_pointer(reg, cc);
     return r;
   }
 
   if (IS_NULL(cc->mbuf)) {
-    if (IS_CCLASS_NOT(cc))
+    if (IS_NCCLASS_NOT(cc))
       add_opcode(reg, OP_CCLASS_NOT);
     else
       add_opcode(reg, OP_CCLASS);
@@ -604,7 +624,7 @@ compile_cclass_node(CClassNode* cc, regex_t* reg)
   }
   else {
     if (ONIGENC_MBC_MINLEN(reg->enc) > 1 || bitset_is_empty(cc->bs)) {
-      if (IS_CCLASS_NOT(cc))
+      if (IS_NCCLASS_NOT(cc))
         add_opcode(reg, OP_CCLASS_MB_NOT);
       else
         add_opcode(reg, OP_CCLASS_MB);
@@ -612,7 +632,7 @@ compile_cclass_node(CClassNode* cc, regex_t* reg)
       r = add_multi_byte_cclass(cc->mbuf, reg);
     }
     else {
-      if (IS_CCLASS_NOT(cc))
+      if (IS_NCCLASS_NOT(cc))
         add_opcode(reg, OP_CCLASS_MIX_NOT);
       else
         add_opcode(reg, OP_CCLASS_MIX);
@@ -635,7 +655,7 @@ entry_repeat_range(regex_t* reg, int id, int lower, int upper)
 
   if (reg->repeat_range_alloc == 0) {
     p = (OnigRepeatRange* )xmalloc(sizeof(OnigRepeatRange) * REPEAT_RANGE_ALLOC);
-    CHECK_NULL_RETURN_VAL(p, ONIGERR_MEMORY);
+    CHECK_NULL_RETURN_MEMERR(p);
     reg->repeat_range = p;
     reg->repeat_range_alloc = REPEAT_RANGE_ALLOC;
   }
@@ -644,7 +664,7 @@ entry_repeat_range(regex_t* reg, int id, int lower, int upper)
     n = reg->repeat_range_alloc + REPEAT_RANGE_ALLOC;
     p = (OnigRepeatRange* )xrealloc(reg->repeat_range,
                                     sizeof(OnigRepeatRange) * n);
-    CHECK_NULL_RETURN_VAL(p, ONIGERR_MEMORY);
+    CHECK_NULL_RETURN_MEMERR(p);
     reg->repeat_range = p;
     reg->repeat_range_alloc = n;
   }
@@ -658,7 +678,7 @@ entry_repeat_range(regex_t* reg, int id, int lower, int upper)
 }
 
 static int
-compile_range_repeat_node(QuantifierNode* qn, int target_len, int empty_info,
+compile_range_repeat_node(QtfrNode* qn, int target_len, int empty_info,
                           regex_t* reg)
 {
   int r;
@@ -694,10 +714,10 @@ compile_range_repeat_node(QuantifierNode* qn, int target_len, int empty_info,
 }
 
 static int
-is_anychar_star_quantifier(QuantifierNode* qn)
+is_anychar_star_quantifier(QtfrNode* qn)
 {
   if (qn->greedy && IS_REPEAT_INFINITE(qn->upper) &&
-      NTYPE(qn->target) == N_ANYCHAR)
+      NTYPE(qn->target) == NT_CANY)
     return 1;
   else
     return 0;
@@ -709,7 +729,7 @@ is_anychar_star_quantifier(QuantifierNode* qn)
 #ifdef USE_COMBINATION_EXPLOSION_CHECK
 
 static int
-compile_length_quantifier_node(QuantifierNode* qn, regex_t* reg)
+compile_length_quantifier_node(QtfrNode* qn, regex_t* reg)
 {
   int len, mod_tlen, cklen;
   int ckn;
@@ -724,7 +744,7 @@ compile_length_quantifier_node(QuantifierNode* qn, regex_t* reg)
   cklen = (CKN_ON ? SIZE_STATE_CHECK_NUM: 0);
 
   /* anychar repeat */
-  if (NTYPE(qn->target) == N_ANYCHAR) {
+  if (NTYPE(qn->target) == NT_CANY) {
     if (qn->greedy && infinite) {
       if (IS_NOT_NULL(qn->next_head_exact) && !CKN_ON)
         return SIZE_OP_ANYCHAR_STAR_PEEK_NEXT + tlen * qn->lower + cklen;
@@ -789,7 +809,7 @@ compile_length_quantifier_node(QuantifierNode* qn, regex_t* reg)
 }
 
 static int
-compile_quantifier_node(QuantifierNode* qn, regex_t* reg)
+compile_quantifier_node(QtfrNode* qn, regex_t* reg)
 {
   int r, mod_tlen;
   int ckn;
@@ -815,7 +835,7 @@ compile_quantifier_node(QuantifierNode* qn, regex_t* reg)
        if (r) return r;
       }
 
-      return add_bytes(reg, NSTRING(qn->next_head_exact).s, 1);
+      return add_bytes(reg, NSTR(qn->next_head_exact)->s, 1);
     }
     else {
       if (IS_MULTILINE(reg->options)) {
@@ -943,7 +963,7 @@ compile_quantifier_node(QuantifierNode* qn, regex_t* reg)
 #else /* USE_COMBINATION_EXPLOSION_CHECK */
 
 static int
-compile_length_quantifier_node(QuantifierNode* qn, regex_t* reg)
+compile_length_quantifier_node(QtfrNode* qn, regex_t* reg)
 {
   int len, mod_tlen;
   int infinite = IS_REPEAT_INFINITE(qn->upper);
@@ -953,7 +973,7 @@ compile_length_quantifier_node(QuantifierNode* qn, regex_t* reg)
   if (tlen < 0) return tlen;
 
   /* anychar repeat */
-  if (NTYPE(qn->target) == N_ANYCHAR) {
+  if (NTYPE(qn->target) == NT_CANY) {
     if (qn->greedy && infinite) {
       if (IS_NOT_NULL(qn->next_head_exact))
         return SIZE_OP_ANYCHAR_STAR_PEEK_NEXT + tlen * qn->lower;
@@ -1008,7 +1028,7 @@ compile_length_quantifier_node(QuantifierNode* qn, regex_t* reg)
 }
 
 static int
-compile_quantifier_node(QuantifierNode* qn, regex_t* reg)
+compile_quantifier_node(QtfrNode* qn, regex_t* reg)
 {
   int i, r, mod_tlen;
   int infinite = IS_REPEAT_INFINITE(qn->upper);
@@ -1026,7 +1046,7 @@ compile_quantifier_node(QuantifierNode* qn, regex_t* reg)
       else
        r = add_opcode(reg, OP_ANYCHAR_STAR_PEEK_NEXT);
       if (r) return r;
-      return add_bytes(reg, NSTRING(qn->next_head_exact).s, 1);
+      return add_bytes(reg, NSTR(qn->next_head_exact)->s, 1);
     }
     else {
       if (IS_MULTILINE(reg->options))
@@ -1067,7 +1087,7 @@ compile_quantifier_node(QuantifierNode* qn, regex_t* reg)
        r = add_opcode_rel_addr(reg, OP_PUSH_OR_JUMP_EXACT1,
                             mod_tlen + SIZE_OP_JUMP);
        if (r) return r;
-       add_bytes(reg, NSTRING(qn->head_exact).s, 1);
+       add_bytes(reg, NSTR(qn->head_exact)->s, 1);
        r = compile_tree_empty_check(qn->target, reg, empty_info);
        if (r) return r;
        r = add_opcode_rel_addr(reg, OP_JUMP,
@@ -1077,7 +1097,7 @@ compile_quantifier_node(QuantifierNode* qn, regex_t* reg)
        r = add_opcode_rel_addr(reg, OP_PUSH_IF_PEEK_NEXT,
                                mod_tlen + SIZE_OP_JUMP);
        if (r) return r;
-       add_bytes(reg, NSTRING(qn->next_head_exact).s, 1);
+       add_bytes(reg, NSTR(qn->next_head_exact)->s, 1);
        r = compile_tree_empty_check(qn->target, reg, empty_info);
        if (r) return r;
        r = add_opcode_rel_addr(reg, OP_JUMP,
@@ -1136,7 +1156,7 @@ compile_quantifier_node(QuantifierNode* qn, regex_t* reg)
 #endif /* USE_COMBINATION_EXPLOSION_CHECK */
 
 static int
-compile_length_option_node(EffectNode* node, regex_t* reg)
+compile_length_option_node(EncloseNode* node, regex_t* reg)
 {
   int tlen;
   OnigOptionType prev = reg->options;
@@ -1156,7 +1176,7 @@ compile_length_option_node(EffectNode* node, regex_t* reg)
 }
 
 static int
-compile_option_node(EffectNode* node, regex_t* reg)
+compile_option_node(EncloseNode* node, regex_t* reg)
 {
   int r;
   OnigOptionType prev = reg->options;
@@ -1182,12 +1202,12 @@ compile_option_node(EffectNode* node, regex_t* reg)
 }
 
 static int
-compile_length_effect_node(EffectNode* node, regex_t* reg)
+compile_length_enclose_node(EncloseNode* node, regex_t* reg)
 {
   int len;
   int tlen;
 
-  if (node->type == EFFECT_OPTION)
+  if (node->type == ENCLOSE_OPTION)
     return compile_length_option_node(node, reg);
 
   if (node->target) {
@@ -1198,16 +1218,16 @@ compile_length_effect_node(EffectNode* node, regex_t* reg)
     tlen = 0;
 
   switch (node->type) {
-  case EFFECT_MEMORY:
+  case ENCLOSE_MEMORY:
 #ifdef USE_SUBEXP_CALL
-    if (IS_EFFECT_CALLED(node)) {
+    if (IS_ENCLOSE_CALLED(node)) {
       len = SIZE_OP_MEMORY_START_PUSH + tlen
          + SIZE_OP_CALL + SIZE_OP_JUMP + SIZE_OP_RETURN;
       if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum))
-       len += (IS_EFFECT_RECURSION(node)
+       len += (IS_ENCLOSE_RECURSION(node)
                ? SIZE_OP_MEMORY_END_PUSH_REC : SIZE_OP_MEMORY_END_PUSH);
       else
-       len += (IS_EFFECT_RECURSION(node)
+       len += (IS_ENCLOSE_RECURSION(node)
                ? SIZE_OP_MEMORY_END_REC : SIZE_OP_MEMORY_END);
     }
     else
@@ -1223,9 +1243,9 @@ compile_length_effect_node(EffectNode* node, regex_t* reg)
     }
     break;
 
-  case EFFECT_STOP_BACKTRACK:
-    if (IS_EFFECT_STOP_BT_SIMPLE_REPEAT(node)) {
-      QuantifierNode* qn = &NQUANTIFIER(node->target);
+  case ENCLOSE_STOP_BACKTRACK:
+    if (IS_ENCLOSE_STOP_BT_SIMPLE_REPEAT(node)) {
+      QtfrNode* qn = NQTFR(node->target);
       tlen = compile_length_tree(qn->target, reg);
       if (tlen < 0) return tlen;
 
@@ -1248,17 +1268,17 @@ compile_length_effect_node(EffectNode* node, regex_t* reg)
 static int get_char_length_tree(Node* node, regex_t* reg, int* len);
 
 static int
-compile_effect_node(EffectNode* node, regex_t* reg)
+compile_enclose_node(EncloseNode* node, regex_t* reg)
 {
   int r, len;
 
-  if (node->type == EFFECT_OPTION)
+  if (node->type == ENCLOSE_OPTION)
     return compile_option_node(node, reg);
 
   switch (node->type) {
-  case EFFECT_MEMORY:
+  case ENCLOSE_MEMORY:
 #ifdef USE_SUBEXP_CALL
-    if (IS_EFFECT_CALLED(node)) {
+    if (IS_ENCLOSE_CALLED(node)) {
       r = add_opcode(reg, OP_CALL);
       if (r) return r;
       node->call_addr = BBUF_GET_OFFSET_POS(reg) + SIZE_ABSADDR + SIZE_OP_JUMP;
@@ -1268,10 +1288,10 @@ compile_effect_node(EffectNode* node, regex_t* reg)
       len = compile_length_tree(node->target, reg);
       len += (SIZE_OP_MEMORY_START_PUSH + SIZE_OP_RETURN);
       if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum))
-       len += (IS_EFFECT_RECURSION(node)
+       len += (IS_ENCLOSE_RECURSION(node)
                ? SIZE_OP_MEMORY_END_PUSH_REC : SIZE_OP_MEMORY_END_PUSH);
       else
-       len += (IS_EFFECT_RECURSION(node)
+       len += (IS_ENCLOSE_RECURSION(node)
                ? SIZE_OP_MEMORY_END_REC : SIZE_OP_MEMORY_END);
 
       r = add_opcode_rel_addr(reg, OP_JUMP, len);
@@ -1288,12 +1308,12 @@ compile_effect_node(EffectNode* node, regex_t* reg)
     r = compile_tree(node->target, reg);
     if (r) return r;
 #ifdef USE_SUBEXP_CALL
-    if (IS_EFFECT_CALLED(node)) {
+    if (IS_ENCLOSE_CALLED(node)) {
       if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum))
-       r = add_opcode(reg, (IS_EFFECT_RECURSION(node)
+       r = add_opcode(reg, (IS_ENCLOSE_RECURSION(node)
                             ? OP_MEMORY_END_PUSH_REC : OP_MEMORY_END_PUSH));
       else
-       r = add_opcode(reg, (IS_EFFECT_RECURSION(node)
+       r = add_opcode(reg, (IS_ENCLOSE_RECURSION(node)
                             ? OP_MEMORY_END_REC : OP_MEMORY_END));
 
       if (r) return r;
@@ -1313,9 +1333,9 @@ compile_effect_node(EffectNode* node, regex_t* reg)
     }
     break;
 
-  case EFFECT_STOP_BACKTRACK:
-    if (IS_EFFECT_STOP_BT_SIMPLE_REPEAT(node)) {
-      QuantifierNode* qn = &NQUANTIFIER(node->target);
+  case ENCLOSE_STOP_BACKTRACK:
+    if (IS_ENCLOSE_STOP_BT_SIMPLE_REPEAT(node)) {
+      QtfrNode* qn = NQTFR(node->target);
       r = compile_tree_n_times(qn->target, qn->lower, reg);
       if (r) return r;
 
@@ -1472,50 +1492,50 @@ compile_length_tree(Node* node, regex_t* reg)
 
   type = NTYPE(node);
   switch (type) {
-  case N_LIST:
+  case NT_LIST:
     len = 0;
     do {
-      r = compile_length_tree(NCONS(node).left, reg);
+      r = compile_length_tree(NCAR(node), reg);
       if (r < 0) return r;
       len += r;
-    } while (IS_NOT_NULL(node = NCONS(node).right));
+    } while (IS_NOT_NULL(node = NCDR(node)));
     r = len;
     break;
 
-  case N_ALT:
+  case NT_ALT:
     {
       int n;
 
       n = r = 0;
       do {
-       r += compile_length_tree(NCONS(node).left, reg);
+       r += compile_length_tree(NCAR(node), reg);
        n++;
-      } while (IS_NOT_NULL(node = NCONS(node).right));
+      } while (IS_NOT_NULL(node = NCDR(node)));
       r += (SIZE_OP_PUSH + SIZE_OP_JUMP) * (n - 1);
     }
     break;
 
-  case N_STRING:
+  case NT_STR:
     if (NSTRING_IS_RAW(node))
-      r = compile_length_string_raw_node(&(NSTRING(node)), reg);
+      r = compile_length_string_raw_node(NSTR(node), reg);
     else
       r = compile_length_string_node(node, reg);
     break;
 
-  case N_CCLASS:
-    r = compile_length_cclass_node(&(NCCLASS(node)), reg);
+  case NT_CCLASS:
+    r = compile_length_cclass_node(NCCLASS(node), reg);
     break;
 
-  case N_CTYPE:
-  case N_ANYCHAR:
+  case NT_CTYPE:
+  case NT_CANY:
     r = SIZE_OPCODE;
     break;
 
-  case N_BACKREF:
+  case NT_BREF:
     {
-      BackrefNode* br = &(NBACKREF(node));
+      BRefNode* br = NBREF(node);
 
-#ifdef USE_BACKREF_AT_LEVEL
+#ifdef USE_BACKREF_WITH_LEVEL
       if (IS_BACKREF_NEST_LEVEL(br)) {
         r = SIZE_OPCODE + SIZE_OPTION + SIZE_LENGTH +
             SIZE_LENGTH + (SIZE_MEMNUM * br->back_num);
@@ -1533,21 +1553,21 @@ compile_length_tree(Node* node, regex_t* reg)
     break;
 
 #ifdef USE_SUBEXP_CALL
-  case N_CALL:
+  case NT_CALL:
     r = SIZE_OP_CALL;
     break;
 #endif
 
-  case N_QUANTIFIER:
-    r = compile_length_quantifier_node(&(NQUANTIFIER(node)), reg);
+  case NT_QTFR:
+    r = compile_length_quantifier_node(NQTFR(node), reg);
     break;
 
-  case N_EFFECT:
-    r = compile_length_effect_node(&NEFFECT(node), reg);
+  case NT_ENCLOSE:
+    r = compile_length_enclose_node(NENCLOSE(node), reg);
     break;
 
-  case N_ANCHOR:
-    r = compile_length_anchor_node(&(NANCHOR(node)), reg);
+  case NT_ANCHOR:
+    r = compile_length_anchor_node(NANCHOR(node), reg);
     break;
 
   default:
@@ -1565,59 +1585,61 @@ compile_tree(Node* node, regex_t* reg)
 
   type = NTYPE(node);
   switch (type) {
-  case N_LIST:
+  case NT_LIST:
     do {
-      r = compile_tree(NCONS(node).left, reg);
-    } while (r == 0 && IS_NOT_NULL(node = NCONS(node).right));
+      r = compile_tree(NCAR(node), reg);
+    } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
     break;
 
-  case N_ALT:
+  case NT_ALT:
     {
       Node* x = node;
       len = 0;
       do {
-       len += compile_length_tree(NCONS(x).left, reg);
-       if (NCONS(x).right != NULL) {
+       len += compile_length_tree(NCAR(x), reg);
+       if (NCDR(x) != NULL) {
          len += SIZE_OP_PUSH + SIZE_OP_JUMP;
        }
-      } while (IS_NOT_NULL(x = NCONS(x).right));
+      } while (IS_NOT_NULL(x = NCDR(x)));
       pos = reg->used + len;  /* goal position */
 
       do {
-       len = compile_length_tree(NCONS(node).left, reg);
-       if (IS_NOT_NULL(NCONS(node).right)) {
+       len = compile_length_tree(NCAR(node), reg);
+       if (IS_NOT_NULL(NCDR(node))) {
          r = add_opcode_rel_addr(reg, OP_PUSH, len + SIZE_OP_JUMP);
          if (r) break;
        }
-       r = compile_tree(NCONS(node).left, reg);
+       r = compile_tree(NCAR(node), reg);
        if (r) break;
-       if (IS_NOT_NULL(NCONS(node).right)) {
+       if (IS_NOT_NULL(NCDR(node))) {
          len = pos - (reg->used + SIZE_OP_JUMP);
          r = add_opcode_rel_addr(reg, OP_JUMP, len);
          if (r) break;
        }
-      } while (IS_NOT_NULL(node = NCONS(node).right));
+      } while (IS_NOT_NULL(node = NCDR(node)));
     }
     break;
 
-  case N_STRING:
+  case NT_STR:
     if (NSTRING_IS_RAW(node))
-      r = compile_string_raw_node(&(NSTRING(node)), reg);
+      r = compile_string_raw_node(NSTR(node), reg);
     else
       r = compile_string_node(node, reg);
     break;
 
-  case N_CCLASS:
-    r = compile_cclass_node(&(NCCLASS(node)), reg);
+  case NT_CCLASS:
+    r = compile_cclass_node(NCCLASS(node), reg);
     break;
 
-  case N_CTYPE:
+  case NT_CTYPE:
     {
       int op;
 
-      switch (NCTYPE(node).type) {
-      case CTYPE_WORD:            op = OP_WORD;           break;
-      case CTYPE_NOT_WORD:        op = OP_NOT_WORD;       break;
+      switch (NCTYPE(node)->ctype) {
+      case ONIGENC_CTYPE_WORD:
+       if (NCTYPE(node)->not != 0)  op = OP_NOT_WORD;
+       else                         op = OP_WORD;
+       break;
       default:
        return ONIGERR_TYPE_BUG;
        break;
@@ -1626,20 +1648,20 @@ compile_tree(Node* node, regex_t* reg)
     }
     break;
 
-  case N_ANYCHAR:
+  case NT_CANY:
     if (IS_MULTILINE(reg->options))
       r = add_opcode(reg, OP_ANYCHAR_ML);
     else
       r = add_opcode(reg, OP_ANYCHAR);
     break;
 
-  case N_BACKREF:
+  case NT_BREF:
     {
-      BackrefNode* br = &(NBACKREF(node));
+      BRefNode* br = NBREF(node);
 
-#ifdef USE_BACKREF_AT_LEVEL
+#ifdef USE_BACKREF_WITH_LEVEL
       if (IS_BACKREF_NEST_LEVEL(br)) {
-       r = add_opcode(reg, OP_BACKREF_AT_LEVEL);
+       r = add_opcode(reg, OP_BACKREF_WITH_LEVEL);
        if (r) return r;
        r = add_option(reg, (reg->options & ONIG_OPTION_IGNORECASE));
        if (r) return r;
@@ -1681,7 +1703,7 @@ compile_tree(Node* node, regex_t* reg)
         }
        if (r) return r;
 
-#ifdef USE_BACKREF_AT_LEVEL
+#ifdef USE_BACKREF_WITH_LEVEL
       add_bacref_mems:
 #endif
        r = add_length(reg, br->back_num);
@@ -1696,21 +1718,21 @@ compile_tree(Node* node, regex_t* reg)
     break;
 
 #ifdef USE_SUBEXP_CALL
-  case N_CALL:
-    r = compile_call(&(NCALL(node)), reg);
+  case NT_CALL:
+    r = compile_call(NCALL(node), reg);
     break;
 #endif
 
-  case N_QUANTIFIER:
-    r = compile_quantifier_node(&(NQUANTIFIER(node)), reg);
+  case NT_QTFR:
+    r = compile_quantifier_node(NQTFR(node), reg);
     break;
 
-  case N_EFFECT:
-    r = compile_effect_node(&NEFFECT(node), reg);
+  case NT_ENCLOSE:
+    r = compile_enclose_node(NENCLOSE(node), reg);
     break;
 
-  case N_ANCHOR:
-    r = compile_anchor_node(&(NANCHOR(node)), reg);
+  case NT_ANCHOR:
+    r = compile_anchor_node(NANCHOR(node), reg);
     break;
 
   default:
@@ -1732,29 +1754,29 @@ noname_disable_map(Node** plink, GroupNumRemap* map, int* counter)
   Node* node = *plink;
 
   switch (NTYPE(node)) {
-  case N_LIST:
-  case N_ALT:
+  case NT_LIST:
+  case NT_ALT:
     do {
-      r = noname_disable_map(&(NCONS(node).left), map, counter);
-    } while (r == 0 && IS_NOT_NULL(node = NCONS(node).right));
+      r = noname_disable_map(&(NCAR(node)), map, counter);
+    } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
     break;
 
-  case N_QUANTIFIER:
+  case NT_QTFR:
     {
-      Node** ptarget = &(NQUANTIFIER(node).target);
+      Node** ptarget = &(NQTFR(node)->target);
       Node*  old = *ptarget;
       r = noname_disable_map(ptarget, map, counter);
-      if (*ptarget != old && NTYPE(*ptarget) == N_QUANTIFIER) {
+      if (*ptarget != old && NTYPE(*ptarget) == NT_QTFR) {
        onig_reduce_nested_quantifier(node, *ptarget);
       }
     }
     break;
 
-  case N_EFFECT:
+  case NT_ENCLOSE:
     {
-      EffectNode* en = &(NEFFECT(node));
-      if (en->type == EFFECT_MEMORY) {
-       if (IS_EFFECT_NAMED_GROUP(en)) {
+      EncloseNode* en = NENCLOSE(node);
+      if (en->type == ENCLOSE_MEMORY) {
+       if (IS_ENCLOSE_NAMED_GROUP(en)) {
          (*counter)++;
          map[en->regnum].new_val = *counter;
          en->regnum = *counter;
@@ -1784,7 +1806,7 @@ renumber_node_backref(Node* node, GroupNumRemap* map)
 {
   int i, pos, n, old_num;
   int *backs;
-  BackrefNode* bn = &(NBACKREF(node));
+  BRefNode* bn = NBREF(node);
 
   if (! IS_BACKREF_NAME_REF(bn))
     return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED;
@@ -1813,20 +1835,20 @@ renumber_by_map(Node* node, GroupNumRemap* map)
   int r = 0;
 
   switch (NTYPE(node)) {
-  case N_LIST:
-  case N_ALT:
+  case NT_LIST:
+  case NT_ALT:
     do {
-      r = renumber_by_map(NCONS(node).left, map);
-    } while (r == 0 && IS_NOT_NULL(node = NCONS(node).right));
+      r = renumber_by_map(NCAR(node), map);
+    } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
     break;
-  case N_QUANTIFIER:
-    r = renumber_by_map(NQUANTIFIER(node).target, map);
+  case NT_QTFR:
+    r = renumber_by_map(NQTFR(node)->target, map);
     break;
-  case N_EFFECT:
-    r = renumber_by_map(NEFFECT(node).target, map);
+  case NT_ENCLOSE:
+    r = renumber_by_map(NENCLOSE(node)->target, map);
     break;
 
-  case N_BACKREF:
+  case NT_BREF:
     r = renumber_node_backref(node, map);
     break;
 
@@ -1843,21 +1865,21 @@ numbered_ref_check(Node* node)
   int r = 0;
 
   switch (NTYPE(node)) {
-  case N_LIST:
-  case N_ALT:
+  case NT_LIST:
+  case NT_ALT:
     do {
-      r = numbered_ref_check(NCONS(node).left);
-    } while (r == 0 && IS_NOT_NULL(node = NCONS(node).right));
+      r = numbered_ref_check(NCAR(node));
+    } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
     break;
-  case N_QUANTIFIER:
-    r = numbered_ref_check(NQUANTIFIER(node).target);
+  case NT_QTFR:
+    r = numbered_ref_check(NQTFR(node)->target);
     break;
-  case N_EFFECT:
-    r = numbered_ref_check(NEFFECT(node).target);
+  case NT_ENCLOSE:
+    r = numbered_ref_check(NENCLOSE(node)->target);
     break;
 
-  case N_BACKREF:
-    if (! IS_BACKREF_NAME_REF(&(NBACKREF(node))))
+  case NT_BREF:
+    if (! IS_BACKREF_NAME_REF(NBREF(node)))
       return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED;
     break;
 
@@ -1876,7 +1898,7 @@ disable_noname_group_capture(Node** root, regex_t* reg, ScanEnv* env)
   GroupNumRemap* map;
 
   map = (GroupNumRemap* )xalloca(sizeof(GroupNumRemap) * (env->num_mem + 1));
-  CHECK_NULL_RETURN_VAL(map, ONIGERR_MEMORY);
+  CHECK_NULL_RETURN_MEMERR(map);
   for (i = 1; i <= env->num_mem; i++) {
     map[i].new_val = 0;
   }
@@ -1914,12 +1936,12 @@ static int
 unset_addr_list_fix(UnsetAddrList* uslist, regex_t* reg)
 {
   int i, offset;
-  EffectNode* en;
+  EncloseNode* en;
   AbsAddrType addr;
 
   for (i = 0; i < uslist->num; i++) {
-    en = &(NEFFECT(uslist->us[i].target));
-    if (! IS_EFFECT_ADDR_FIXED(en)) return ONIGERR_PARSER_BUG;
+    en = NENCLOSE(uslist->us[i].target);
+    if (! IS_ENCLOSE_ADDR_FIXED(en)) return ONIGERR_PARSER_BUG;
     addr = en->call_addr;
     offset = uslist->us[i].offset;
 
@@ -1929,53 +1951,53 @@ unset_addr_list_fix(UnsetAddrList* uslist, regex_t* reg)
 }
 #endif
 
-#ifdef USE_INFINITE_REPEAT_MONOMANIAC_MEM_STATUS_CHECK
+#ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT
 static int
 quantifiers_memory_node_info(Node* node)
 {
   int r = 0;
 
   switch (NTYPE(node)) {
-  case N_LIST:
-  case N_ALT:
+  case NT_LIST:
+  case NT_ALT:
     {
       int v;
       do {
-       v = quantifiers_memory_node_info(NCONS(node).left);
+       v = quantifiers_memory_node_info(NCAR(node));
        if (v > r) r = v;
-      } while (v >= 0 && IS_NOT_NULL(node = NCONS(node).right));
+      } while (v >= 0 && IS_NOT_NULL(node = NCDR(node)));
     }
     break;
 
 #ifdef USE_SUBEXP_CALL
-  case N_CALL:
-    if (IS_CALL_RECURSION(&NCALL(node))) {
+  case NT_CALL:
+    if (IS_CALL_RECURSION(NCALL(node))) {
       return NQ_TARGET_IS_EMPTY_REC; /* tiny version */
     }
     else
-      r = quantifiers_memory_node_info(NCALL(node).target);
+      r = quantifiers_memory_node_info(NCALL(node)->target);
     break;
 #endif
 
-  case N_QUANTIFIER:
+  case NT_QTFR:
     {
-      QuantifierNode* qn = &(NQUANTIFIER(node));
+      QtfrNode* qn = NQTFR(node);
       if (qn->upper != 0) {
        r = quantifiers_memory_node_info(qn->target);
       }
     }
     break;
 
-  case N_EFFECT:
+  case NT_ENCLOSE:
     {
-      EffectNode* en = &(NEFFECT(node));
+      EncloseNode* en = NENCLOSE(node);
       switch (en->type) {
-      case EFFECT_MEMORY:
+      case ENCLOSE_MEMORY:
        return NQ_TARGET_IS_EMPTY_MEM;
        break;
 
-      case EFFECT_OPTION:
-      case EFFECT_STOP_BACKTRACK:
+      case ENCLOSE_OPTION:
+      case ENCLOSE_STOP_BACKTRACK:
        r = quantifiers_memory_node_info(en->target);
        break;
       default:
@@ -1984,19 +2006,19 @@ quantifiers_memory_node_info(Node* node)
     }
     break;
 
-  case N_BACKREF:
-  case N_STRING:
-  case N_CTYPE:
-  case N_CCLASS:
-  case N_ANYCHAR:
-  case N_ANCHOR:
+  case NT_BREF:
+  case NT_STR:
+  case NT_CTYPE:
+  case NT_CCLASS:
+  case NT_CANY:
+  case NT_ANCHOR:
   default:
     break;
   }
 
   return r;
 }
-#endif /* USE_INFINITE_REPEAT_MONOMANIAC_MEM_STATUS_CHECK */
+#endif /* USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT */
 
 static int
 get_min_match_length(Node* node, OnigDistance *min, ScanEnv* env)
@@ -2006,12 +2028,12 @@ get_min_match_length(Node* node, OnigDistance *min, ScanEnv* env)
 
   *min = 0;
   switch (NTYPE(node)) {
-  case N_BACKREF:
+  case NT_BREF:
     {
       int i;
       int* backs;
       Node** nodes = SCANENV_MEM_NODES(env);
-      BackrefNode* br = &(NBACKREF(node));
+      BRefNode* br = NBREF(node);
       if (br->state & NST_RECURSION) break;
 
       backs = BACKREFS_P(br);
@@ -2028,62 +2050,57 @@ get_min_match_length(Node* node, OnigDistance *min, ScanEnv* env)
     break;
 
 #ifdef USE_SUBEXP_CALL
-  case N_CALL:
-    if (IS_CALL_RECURSION(&NCALL(node))) {
-      EffectNode* en = &(NEFFECT(NCALL(node).target));
-      if (IS_EFFECT_MIN_FIXED(en))
+  case NT_CALL:
+    if (IS_CALL_RECURSION(NCALL(node))) {
+      EncloseNode* en = NENCLOSE(NCALL(node)->target);
+      if (IS_ENCLOSE_MIN_FIXED(en))
        *min = en->min_len;
     }
     else
-      r = get_min_match_length(NCALL(node).target, min, env);
+      r = get_min_match_length(NCALL(node)->target, min, env);
     break;
 #endif
 
-  case N_LIST:
+  case NT_LIST:
     do {
-      r = get_min_match_length(NCONS(node).left, &tmin, env);
+      r = get_min_match_length(NCAR(node), &tmin, env);
       if (r == 0) *min += tmin;
-    } while (r == 0 && IS_NOT_NULL(node = NCONS(node).right));
+    } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
     break;
 
-  case N_ALT:
+  case NT_ALT:
     {
       Node *x, *y;
       y = node;
       do {
-       x = NCONS(y).left;
+       x = NCAR(y);
        r = get_min_match_length(x, &tmin, env);
        if (r != 0) break;
        if (y == node) *min = tmin;
        else if (*min > tmin) *min = tmin;
-      } while (r == 0 && IS_NOT_NULL(y = NCONS(y).right));
+      } while (r == 0 && IS_NOT_NULL(y = NCDR(y)));
     }
     break;
 
-  case N_STRING:
+  case NT_STR:
     {
-      StrNode* sn = &(NSTRING(node));
+      StrNode* sn = NSTR(node);
       *min = sn->end - sn->s;
     }
     break;
 
-  case N_CTYPE:
-    switch (NCTYPE(node).type) {
-    case CTYPE_WORD:     *min = 1; break;
-    case CTYPE_NOT_WORD: *min = 1; break;
-    default:
-      break;
-    }
+  case NT_CTYPE:
+    *min = 1;
     break;
 
-  case N_CCLASS:
-  case N_ANYCHAR:
+  case NT_CCLASS:
+  case NT_CANY:
     *min = 1;
     break;
 
-  case N_QUANTIFIER:
+  case NT_QTFR:
     {
-      QuantifierNode* qn = &(NQUANTIFIER(node));
+      QtfrNode* qn = NQTFR(node);
 
       if (qn->lower > 0) {
        r = get_min_match_length(qn->target, min, env);
@@ -2093,32 +2110,32 @@ get_min_match_length(Node* node, OnigDistance *min, ScanEnv* env)
     }
     break;
 
-  case N_EFFECT:
+  case NT_ENCLOSE:
     {
-      EffectNode* en = &(NEFFECT(node));
+      EncloseNode* en = NENCLOSE(node);
       switch (en->type) {
-      case EFFECT_MEMORY:
+      case ENCLOSE_MEMORY:
 #ifdef USE_SUBEXP_CALL
-       if (IS_EFFECT_MIN_FIXED(en))
+       if (IS_ENCLOSE_MIN_FIXED(en))
          *min = en->min_len;
        else {
          r = get_min_match_length(en->target, min, env);
          if (r == 0) {
            en->min_len = *min;
-           SET_EFFECT_STATUS(node, NST_MIN_FIXED);
+           SET_ENCLOSE_STATUS(node, NST_MIN_FIXED);
          }
        }
        break;
 #endif
-      case EFFECT_OPTION:
-      case EFFECT_STOP_BACKTRACK:
+      case ENCLOSE_OPTION:
+      case ENCLOSE_STOP_BACKTRACK:
        r = get_min_match_length(en->target, min, env);
        break;
       }
     }
     break;
 
-  case N_ANCHOR:
+  case NT_ANCHOR:
   default:
     break;
   }
@@ -2134,51 +2151,43 @@ get_max_match_length(Node* node, OnigDistance *max, ScanEnv* env)
 
   *max = 0;
   switch (NTYPE(node)) {
-  case N_LIST:
+  case NT_LIST:
     do {
-      r = get_max_match_length(NCONS(node).left, &tmax, env);
+      r = get_max_match_length(NCAR(node), &tmax, env);
       if (r == 0)
        *max = distance_add(*max, tmax);
-    } while (r == 0 && IS_NOT_NULL(node = NCONS(node).right));
+    } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
     break;
 
-  case N_ALT:
+  case NT_ALT:
     do {
-      r = get_max_match_length(NCONS(node).left, &tmax, env);
+      r = get_max_match_length(NCAR(node), &tmax, env);
       if (r == 0 && *max < tmax) *max = tmax;
-    } while (r == 0 && IS_NOT_NULL(node = NCONS(node).right));
+    } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
     break;
 
-  case N_STRING:
+  case NT_STR:
     {
-      StrNode* sn = &(NSTRING(node));
+      StrNode* sn = NSTR(node);
       *max = sn->end - sn->s;
     }
     break;
 
-  case N_CTYPE:
-    switch (NCTYPE(node).type) {
-    case CTYPE_WORD:
-    case CTYPE_NOT_WORD:
-      *max = ONIGENC_MBC_MAXLEN_DIST(env->enc);
-      break;
-
-    default:
-      break;
-    }
+  case NT_CTYPE:
+    *max = ONIGENC_MBC_MAXLEN_DIST(env->enc);
     break;
 
-  case N_CCLASS:
-  case N_ANYCHAR:
+  case NT_CCLASS:
+  case NT_CANY:
     *max = ONIGENC_MBC_MAXLEN_DIST(env->enc);
     break;
 
-  case N_BACKREF:
+  case NT_BREF:
     {
       int i;
       int* backs;
       Node** nodes = SCANENV_MEM_NODES(env);
-      BackrefNode* br = &(NBACKREF(node));
+      BRefNode* br = NBREF(node);
       if (br->state & NST_RECURSION) {
        *max = ONIG_INFINITE_DISTANCE;
        break;
@@ -2194,17 +2203,17 @@ get_max_match_length(Node* node, OnigDistance *max, ScanEnv* env)
     break;
 
 #ifdef USE_SUBEXP_CALL
-  case N_CALL:
-    if (! IS_CALL_RECURSION(&(NCALL(node))))
-      r = get_max_match_length(NCALL(node).target, max, env);
+  case NT_CALL:
+    if (! IS_CALL_RECURSION(NCALL(node)))
+      r = get_max_match_length(NCALL(node)->target, max, env);
     else
       *max = ONIG_INFINITE_DISTANCE;
     break;
 #endif
 
-  case N_QUANTIFIER:
+  case NT_QTFR:
     {
-      QuantifierNode* qn = &(NQUANTIFIER(node));
+      QtfrNode* qn = NQTFR(node);
 
       if (qn->upper != 0) {
        r = get_max_match_length(qn->target, max, env);
@@ -2218,32 +2227,32 @@ get_max_match_length(Node* node, OnigDistance *max, ScanEnv* env)
     }
     break;
 
-  case N_EFFECT:
+  case NT_ENCLOSE:
     {
-      EffectNode* en = &(NEFFECT(node));
+      EncloseNode* en = NENCLOSE(node);
       switch (en->type) {
-      case EFFECT_MEMORY:
+      case ENCLOSE_MEMORY:
 #ifdef USE_SUBEXP_CALL
-       if (IS_EFFECT_MAX_FIXED(en))
+       if (IS_ENCLOSE_MAX_FIXED(en))
          *max = en->max_len;
        else {
          r = get_max_match_length(en->target, max, env);
          if (r == 0) {
            en->max_len = *max;
-           SET_EFFECT_STATUS(node, NST_MAX_FIXED);
+           SET_ENCLOSE_STATUS(node, NST_MAX_FIXED);
          }
        }
        break;
 #endif
-      case EFFECT_OPTION:
-      case EFFECT_STOP_BACKTRACK:
+      case ENCLOSE_OPTION:
+      case ENCLOSE_STOP_BACKTRACK:
        r = get_max_match_length(en->target, max, env);
        break;
       }
     }
     break;
 
-  case N_ANCHOR:
+  case NT_ANCHOR:
   default:
     break;
   }
@@ -2264,22 +2273,22 @@ get_char_length_tree1(Node* node, regex_t* reg, int* len, int level)
   level++;
   *len = 0;
   switch (NTYPE(node)) {
-  case N_LIST:
+  case NT_LIST:
     do {
-      r = get_char_length_tree1(NCONS(node).left, reg, &tlen, level);
+      r = get_char_length_tree1(NCAR(node), reg, &tlen, level);
       if (r == 0)
        *len = distance_add(*len, tlen);
-    } while (r == 0 && IS_NOT_NULL(node = NCONS(node).right));
+    } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
     break;
 
-  case N_ALT:
+  case NT_ALT:
     {
       int tlen2;
       int varlen = 0;
 
-      r = get_char_length_tree1(NCONS(node).left, reg, &tlen, level);
-      while (r == 0 && IS_NOT_NULL(node = NCONS(node).right)) {
-       r = get_char_length_tree1(NCONS(node).left, reg, &tlen2, level);
+      r = get_char_length_tree1(NCAR(node), reg, &tlen, level);
+      while (r == 0 && IS_NOT_NULL(node = NCDR(node))) {
+       r = get_char_length_tree1(NCAR(node), reg, &tlen2, level);
        if (r == 0) {
          if (tlen != tlen2)
            varlen = 1;
@@ -2298,20 +2307,20 @@ get_char_length_tree1(Node* node, regex_t* reg, int* len, int level)
     }
     break;
 
-  case N_STRING:
+  case NT_STR:
     {
-      StrNode* sn = &(NSTRING(node));
+      StrNode* sn = NSTR(node);
       UChar *s = sn->s;
       while (s < sn->end) {
-       s += enc_len(reg->enc, s);
+       s += enclen(reg->enc, s);
        (*len)++;
       }
     }
     break;
 
-  case N_QUANTIFIER:
+  case NT_QTFR:
     {
-      QuantifierNode* qn = &(NQUANTIFIER(node));
+      QtfrNode* qn = NQTFR(node);
       if (qn->lower == qn->upper) {
        r = get_char_length_tree1(qn->target, reg, &tlen, level);
        if (r == 0)
@@ -2323,47 +2332,42 @@ get_char_length_tree1(Node* node, regex_t* reg, int* len, int level)
     break;
 
 #ifdef USE_SUBEXP_CALL
-  case N_CALL:
-    if (! IS_CALL_RECURSION(&(NCALL(node))))
-      r = get_char_length_tree1(NCALL(node).target, reg, len, level);
+  case NT_CALL:
+    if (! IS_CALL_RECURSION(NCALL(node)))
+      r = get_char_length_tree1(NCALL(node)->target, reg, len, level);
     else
       r = GET_CHAR_LEN_VARLEN;
     break;
 #endif
 
-  case N_CTYPE:
-    switch (NCTYPE(node).type) {
-    case CTYPE_WORD:
-    case CTYPE_NOT_WORD:
-      *len = 1;
-      break;
-    }
+  case NT_CTYPE:
+    *len = 1;
     break;
 
-  case N_CCLASS:
-  case N_ANYCHAR:
+  case NT_CCLASS:
+  case NT_CANY:
     *len = 1;
     break;
 
-  case N_EFFECT:
+  case NT_ENCLOSE:
     {
-      EffectNode* en = &(NEFFECT(node));
+      EncloseNode* en = NENCLOSE(node);
       switch (en->type) {
-      case EFFECT_MEMORY:
+      case ENCLOSE_MEMORY:
 #ifdef USE_SUBEXP_CALL
-       if (IS_EFFECT_CLEN_FIXED(en))
+       if (IS_ENCLOSE_CLEN_FIXED(en))
          *len = en->char_len;
        else {
          r = get_char_length_tree1(en->target, reg, len, level);
          if (r == 0) {
            en->char_len = *len;
-           SET_EFFECT_STATUS(node, NST_CLEN_FIXED);
+           SET_ENCLOSE_STATUS(node, NST_CLEN_FIXED);
          }
        }
        break;
 #endif
-      case EFFECT_OPTION:
-      case EFFECT_STOP_BACKTRACK:
+      case ENCLOSE_OPTION:
+      case ENCLOSE_STOP_BACKTRACK:
        r = get_char_length_tree1(en->target, reg, len, level);
        break;
       default:
@@ -2372,7 +2376,7 @@ get_char_length_tree1(Node* node, regex_t* reg, int* len, int level)
     }
     break;
 
-  case N_ANCHOR:
+  case NT_ANCHOR:
     break;
 
   default:
@@ -2401,29 +2405,18 @@ is_not_included(Node* x, Node* y, regex_t* reg)
  retry:
   ytype = NTYPE(y);
   switch (NTYPE(x)) {
-  case N_CTYPE:
+  case NT_CTYPE:
     {
       switch (ytype) {
-      case N_CTYPE:
-       switch (NCTYPE(x).type) {
-       case CTYPE_WORD:
-         if (NCTYPE(y).type == CTYPE_NOT_WORD)
-           return 1;
-         else
-           return 0;
-         break;
-       case CTYPE_NOT_WORD:
-         if (NCTYPE(y).type == CTYPE_WORD)
-           return 1;
-         else
-           return 0;
-         break;
-       default:
-         break;
-       }
+      case NT_CTYPE:
+       if (NCTYPE(y)->ctype == NCTYPE(x)->ctype &&
+           NCTYPE(y)->not   != NCTYPE(x)->not)
+         return 1;
+       else
+         return 0;
        break;
 
-      case N_CCLASS:
+      case NT_CCLASS:
       swap:
        {
          Node* tmp;
@@ -2432,7 +2425,7 @@ is_not_included(Node* x, Node* y, regex_t* reg)
        }
        break;
 
-      case N_STRING:
+      case NT_STR:
        goto swap;
        break;
 
@@ -2442,37 +2435,39 @@ is_not_included(Node* x, Node* y, regex_t* reg)
     }
     break;
 
-  case N_CCLASS:
+  case NT_CCLASS:
     {
-      CClassNode* xc = &(NCCLASS(x));
+      CClassNode* xc = NCCLASS(x);
       switch (ytype) {
-      case N_CTYPE:
-       switch (NCTYPE(y).type) {
-       case CTYPE_WORD:
-         if (IS_NULL(xc->mbuf) && !IS_CCLASS_NOT(xc)) {
-           for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
-             if (BITSET_AT(xc->bs, i)) {
-               if (ONIGENC_IS_CODE_SB_WORD(reg->enc, i)) return 0;
+      case NT_CTYPE:
+       switch (NCTYPE(y)->ctype) {
+       case ONIGENC_CTYPE_WORD:
+         if (NCTYPE(y)->not == 0) {
+           if (IS_NULL(xc->mbuf) && !IS_NCCLASS_NOT(xc)) {
+             for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
+               if (BITSET_AT(xc->bs, i)) {
+                 if (IS_CODE_SB_WORD(reg->enc, i)) return 0;
+               }
              }
+             return 1;
            }
-           return 1;
+           return 0;
          }
-         return 0;
-         break;
-       case CTYPE_NOT_WORD:
-         for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
-           if (! ONIGENC_IS_CODE_SB_WORD(reg->enc, i)) {
-             if (!IS_CCLASS_NOT(xc)) {
-               if (BITSET_AT(xc->bs, i))
-                 return 0;
-             }
-             else {
-               if (! BITSET_AT(xc->bs, i))
-                 return 0;
+         else {
+           for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
+             if (! IS_CODE_SB_WORD(reg->enc, i)) {
+               if (!IS_NCCLASS_NOT(xc)) {
+                 if (BITSET_AT(xc->bs, i))
+                   return 0;
+               }
+               else {
+                 if (! BITSET_AT(xc->bs, i))
+                   return 0;
+               }
              }
            }
+           return 1;
          }
-         return 1;
          break;
 
        default:
@@ -2480,29 +2475,29 @@ is_not_included(Node* x, Node* y, regex_t* reg)
        }
        break;
 
-      case N_CCLASS:
+      case NT_CCLASS:
        {
          int v;
-         CClassNode* yc = &(NCCLASS(y));
+         CClassNode* yc = NCCLASS(y);
 
          for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
            v = BITSET_AT(xc->bs, i);
-           if ((v != 0 && !IS_CCLASS_NOT(xc)) ||
-                (v == 0 && IS_CCLASS_NOT(xc))) {
+           if ((v != 0 && !IS_NCCLASS_NOT(xc)) ||
+                (v == 0 && IS_NCCLASS_NOT(xc))) {
              v = BITSET_AT(yc->bs, i);
-             if ((v != 0 && !IS_CCLASS_NOT(yc)) ||
-                  (v == 0 && IS_CCLASS_NOT(yc)))
+             if ((v != 0 && !IS_NCCLASS_NOT(yc)) ||
+                  (v == 0 && IS_NCCLASS_NOT(yc)))
                return 0;
            }
          }
-         if ((IS_NULL(xc->mbuf) && !IS_CCLASS_NOT(xc)) ||
-             (IS_NULL(yc->mbuf) && !IS_CCLASS_NOT(yc)))
+         if ((IS_NULL(xc->mbuf) && !IS_NCCLASS_NOT(xc)) ||
+             (IS_NULL(yc->mbuf) && !IS_NCCLASS_NOT(yc)))
            return 1;
          return 0;
        }
        break;
 
-      case N_STRING:
+      case NT_STR:
        goto swap;
        break;
 
@@ -2512,30 +2507,30 @@ is_not_included(Node* x, Node* y, regex_t* reg)
     }
     break;
 
-  case N_STRING:
+  case NT_STR:
     {
-      StrNode* xs = &(NSTRING(x));
+      StrNode* xs = NSTR(x);
       if (NSTRING_LEN(x) == 0)
        break;
 
       c = *(xs->s);
       switch (ytype) {
-      case N_CTYPE:
-       switch (NCTYPE(y).type) {
-       case CTYPE_WORD:
-         return (ONIGENC_IS_MBC_WORD(reg->enc, xs->s, xs->end) ? 0 : 1);
-         break;
-       case CTYPE_NOT_WORD:
-         return (ONIGENC_IS_MBC_WORD(reg->enc, xs->s, xs->end) ? 1 : 0);
+      case NT_CTYPE:
+       switch (NCTYPE(y)->ctype) {
+       case ONIGENC_CTYPE_WORD:
+         if (ONIGENC_IS_MBC_WORD(reg->enc, xs->s, xs->end))
+           return NCTYPE(y)->not;
+         else
+           return !(NCTYPE(y)->not);
          break;
        default:
          break;
        }
        break;
 
-      case N_CCLASS:
+      case NT_CCLASS:
        {
-         CClassNode* cc = &(NCCLASS(y));
+         CClassNode* cc = NCCLASS(y);
 
          code = ONIGENC_MBC_TO_CODE(reg->enc, xs->s,
                                     xs->s + ONIGENC_MBC_MAXLEN(reg->enc));
@@ -2543,10 +2538,10 @@ is_not_included(Node* x, Node* y, regex_t* reg)
        }
        break;
 
-      case N_STRING:
+      case NT_STR:
        {
          UChar *q;
-         StrNode* ys = &(NSTRING(y));
+         StrNode* ys = NSTR(y);
          len = NSTRING_LEN(x);
          if (len > NSTRING_LEN(y)) len = NSTRING_LEN(y);
          if (NSTRING_IS_AMBIG(x) || NSTRING_IS_AMBIG(y)) {
@@ -2580,40 +2575,34 @@ get_head_value_node(Node* node, int exact, regex_t* reg)
   Node* n = NULL_NODE;
 
   switch (NTYPE(node)) {
-  case N_BACKREF:
-  case N_ALT:
-  case N_ANYCHAR:
+  case NT_BREF:
+  case NT_ALT:
+  case NT_CANY:
 #ifdef USE_SUBEXP_CALL
-  case N_CALL:
+  case NT_CALL:
 #endif
     break;
 
-  case N_CTYPE:
-  case N_CCLASS:
+  case NT_CTYPE:
+  case NT_CCLASS:
     if (exact == 0) {
       n = node;
     }
     break;
 
-  case N_LIST:
-    n = get_head_value_node(NCONS(node).left, exact, reg);
+  case NT_LIST:
+    n = get_head_value_node(NCAR(node), exact, reg);
     break;
 
-  case N_STRING:
+  case NT_STR:
     {
-      StrNode* sn = &(NSTRING(node));
+      StrNode* sn = NSTR(node);
 
       if (sn->end <= sn->s)
        break;
 
       if (exact != 0 &&
          !NSTRING_IS_RAW(node) && IS_IGNORECASE(reg->options)) {
-#if 0
-        UChar* tmp = sn->s;
-       if (! ONIGENC_IS_MBC_AMBIGUOUS(reg->enc, reg->ambig_flag,
-                                       &tmp, sn->end))
-         n = node;
-#endif
       }
       else {
        n = node;
@@ -2621,9 +2610,9 @@ get_head_value_node(Node* node, int exact, regex_t* reg)
     }
     break;
 
-  case N_QUANTIFIER:
+  case NT_QTFR:
     {
-      QuantifierNode* qn = &(NQUANTIFIER(node));
+      QtfrNode* qn = NQTFR(node);
       if (qn->lower > 0) {
        if (IS_NOT_NULL(qn->head_exact))
          n = qn->head_exact;
@@ -2633,31 +2622,31 @@ get_head_value_node(Node* node, int exact, regex_t* reg)
     }
     break;
 
-  case N_EFFECT:
+  case NT_ENCLOSE:
     {
-      EffectNode* en = &(NEFFECT(node));
+      EncloseNode* en = NENCLOSE(node);
       switch (en->type) {
-      case EFFECT_OPTION:
+      case ENCLOSE_OPTION:
        {
          OnigOptionType options = reg->options;
 
-         reg->options = NEFFECT(node).option;
-         n = get_head_value_node(NEFFECT(node).target, exact, reg);
+         reg->options = NENCLOSE(node)->option;
+         n = get_head_value_node(NENCLOSE(node)->target, exact, reg);
          reg->options = options;
        }
        break;
 
-      case EFFECT_MEMORY:
-      case EFFECT_STOP_BACKTRACK:
+      case ENCLOSE_MEMORY:
+      case ENCLOSE_STOP_BACKTRACK:
        n = get_head_value_node(en->target, exact, reg);
        break;
       }
     }
     break;
 
-  case N_ANCHOR:
-    if (NANCHOR(node).type == ANCHOR_PREC_READ)
-      n = get_head_value_node(NANCHOR(node).target, exact, reg);
+  case NT_ANCHOR:
+    if (NANCHOR(node)->type == ANCHOR_PREC_READ)
+      n = get_head_value_node(NANCHOR(node)->target, exact, reg);
     break;
 
   default:
@@ -2668,45 +2657,46 @@ get_head_value_node(Node* node, int exact, regex_t* reg)
 }
 
 static int
-check_type_tree(Node* node, int type_mask, int effect_mask, int anchor_mask)
+check_type_tree(Node* node, int type_mask, int enclose_mask, int anchor_mask)
 {
   int type, r = 0;
 
   type = NTYPE(node);
-  if ((type & type_mask) == 0)
+  if ((NTYPE2BIT(type) & type_mask) == 0)
     return 1;
 
   switch (type) {
-  case N_LIST:
-  case N_ALT:
+  case NT_LIST:
+  case NT_ALT:
     do {
-      r = check_type_tree(NCONS(node).left, type_mask, effect_mask, anchor_mask);
-    } while (r == 0 && IS_NOT_NULL(node = NCONS(node).right));
+      r = check_type_tree(NCAR(node), type_mask, enclose_mask,
+                         anchor_mask);
+    } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
     break;
 
-  case N_QUANTIFIER:
-    r = check_type_tree(NQUANTIFIER(node).target, type_mask, effect_mask,
+  case NT_QTFR:
+    r = check_type_tree(NQTFR(node)->target, type_mask, enclose_mask,
                        anchor_mask);
     break;
 
-  case N_EFFECT:
+  case NT_ENCLOSE:
     {
-      EffectNode* en = &(NEFFECT(node));
-      if ((en->type & effect_mask) == 0)
+      EncloseNode* en = NENCLOSE(node);
+      if ((en->type & enclose_mask) == 0)
        return 1;
 
-      r = check_type_tree(en->target, type_mask, effect_mask, anchor_mask);
+      r = check_type_tree(en->target, type_mask, enclose_mask, anchor_mask);
     }
     break;
 
-  case N_ANCHOR:
-    type = NANCHOR(node).type;
+  case NT_ANCHOR:
+    type = NANCHOR(node)->type;
     if ((type & anchor_mask) == 0)
       return 1;
 
-    if (NANCHOR(node).target)
-      r = check_type_tree(NANCHOR(node).target,
-                         type_mask, effect_mask, anchor_mask);
+    if (NANCHOR(node)->target)
+      r = check_type_tree(NANCHOR(node)->target,
+                         type_mask, enclose_mask, anchor_mask);
     break;
 
   default:
@@ -2728,7 +2718,7 @@ subexp_inf_recursive_check(Node* node, ScanEnv* env, int head)
 
   type = NTYPE(node);
   switch (type) {
-  case N_LIST:
+  case NT_LIST:
     {
       Node *x;
       OnigDistance min;
@@ -2736,40 +2726,40 @@ subexp_inf_recursive_check(Node* node, ScanEnv* env, int head)
 
       x = node;
       do {
-       ret = subexp_inf_recursive_check(NCONS(x).left, env, head);
+       ret = subexp_inf_recursive_check(NCAR(x), env, head);
        if (ret < 0 || ret == RECURSION_INFINITE) return ret;
        r |= ret;
        if (head) {
-         ret = get_min_match_length(NCONS(x).left, &min, env);
+         ret = get_min_match_length(NCAR(x), &min, env);
          if (ret != 0) return ret;
          if (min != 0) head = 0;
        }
-      } while (IS_NOT_NULL(x = NCONS(x).right));
+      } while (IS_NOT_NULL(x = NCDR(x)));
     }
     break;
 
-  case N_ALT:
+  case NT_ALT:
     {
       int ret;
       r = RECURSION_EXIST;
       do {
-       ret = subexp_inf_recursive_check(NCONS(node).left, env, head);
+       ret = subexp_inf_recursive_check(NCAR(node), env, head);
        if (ret < 0 || ret == RECURSION_INFINITE) return ret;
        r &= ret;
-      } while (IS_NOT_NULL(node = NCONS(node).right));
+      } while (IS_NOT_NULL(node = NCDR(node)));
     }
     break;
 
-  case N_QUANTIFIER:
-    r = subexp_inf_recursive_check(NQUANTIFIER(node).target, env, head);
+  case NT_QTFR:
+    r = subexp_inf_recursive_check(NQTFR(node)->target, env, head);
     if (r == RECURSION_EXIST) {
-      if (NQUANTIFIER(node).lower == 0) r = 0;
+      if (NQTFR(node)->lower == 0) r = 0;
     }
     break;
 
-  case N_ANCHOR:
+  case NT_ANCHOR:
     {
-      AnchorNode* an = &(NANCHOR(node));
+      AnchorNode* an = NANCHOR(node);
       switch (an->type) {
       case ANCHOR_PREC_READ:
       case ANCHOR_PREC_READ_NOT:
@@ -2781,19 +2771,19 @@ subexp_inf_recursive_check(Node* node, ScanEnv* env, int head)
     }
     break;
 
-  case N_CALL:
-    r = subexp_inf_recursive_check(NCALL(node).target, env, head);
+  case NT_CALL:
+    r = subexp_inf_recursive_check(NCALL(node)->target, env, head);
     break;
 
-  case N_EFFECT:
-    if (IS_EFFECT_MARK2(&(NEFFECT(node))))
+  case NT_ENCLOSE:
+    if (IS_ENCLOSE_MARK2(NENCLOSE(node)))
       return 0;
-    else if (IS_EFFECT_MARK1(&(NEFFECT(node))))
+    else if (IS_ENCLOSE_MARK1(NENCLOSE(node)))
       return (head == 0 ? RECURSION_EXIST : RECURSION_INFINITE);
     else {
-      SET_EFFECT_STATUS(node, NST_MARK2);
-      r = subexp_inf_recursive_check(NEFFECT(node).target, env, head);
-      CLEAR_EFFECT_STATUS(node, NST_MARK2);
+      SET_ENCLOSE_STATUS(node, NST_MARK2);
+      r = subexp_inf_recursive_check(NENCLOSE(node)->target, env, head);
+      CLEAR_ENCLOSE_STATUS(node, NST_MARK2);
     }
     break;
 
@@ -2812,20 +2802,20 @@ subexp_inf_recursive_check_trav(Node* node, ScanEnv* env)
 
   type = NTYPE(node);
   switch (type) {
-  case N_LIST:
-  case N_ALT:
+  case NT_LIST:
+  case NT_ALT:
     do {
-      r = subexp_inf_recursive_check_trav(NCONS(node).left, env);
-    } while (r == 0 && IS_NOT_NULL(node = NCONS(node).right));
+      r = subexp_inf_recursive_check_trav(NCAR(node), env);
+    } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
     break;
 
-  case N_QUANTIFIER:
-    r = subexp_inf_recursive_check_trav(NQUANTIFIER(node).target, env);
+  case NT_QTFR:
+    r = subexp_inf_recursive_check_trav(NQTFR(node)->target, env);
     break;
 
-  case N_ANCHOR:
+  case NT_ANCHOR:
     {
-      AnchorNode* an = &(NANCHOR(node));
+      AnchorNode* an = NANCHOR(node);
       switch (an->type) {
       case ANCHOR_PREC_READ:
       case ANCHOR_PREC_READ_NOT:
@@ -2837,15 +2827,15 @@ subexp_inf_recursive_check_trav(Node* node, ScanEnv* env)
     }
     break;
 
-  case N_EFFECT:
+  case NT_ENCLOSE:
     {
-      EffectNode* en = &(NEFFECT(node));
+      EncloseNode* en = NENCLOSE(node);
 
-      if (IS_EFFECT_RECURSION(en)) {
-       SET_EFFECT_STATUS(node, NST_MARK1);
+      if (IS_ENCLOSE_RECURSION(en)) {
+       SET_ENCLOSE_STATUS(node, NST_MARK1);
        r = subexp_inf_recursive_check(en->target, env, 1);
        if (r > 0) return ONIGERR_NEVER_ENDING_RECURSION;
-       CLEAR_EFFECT_STATUS(node, NST_MARK1);
+       CLEAR_ENCLOSE_STATUS(node, NST_MARK1);
       }
       r = subexp_inf_recursive_check_trav(en->target, env);
     }
@@ -2862,25 +2852,23 @@ subexp_inf_recursive_check_trav(Node* node, ScanEnv* env)
 static int
 subexp_recursive_check(Node* node)
 {
-  int type;
   int r = 0;
 
-  type = NTYPE(node);
-  switch (type) {
-  case N_LIST:
-  case N_ALT:
+  switch (NTYPE(node)) {
+  case NT_LIST:
+  case NT_ALT:
     do {
-      r |= subexp_recursive_check(NCONS(node).left);
-    } while (IS_NOT_NULL(node = NCONS(node).right));
+      r |= subexp_recursive_check(NCAR(node));
+    } while (IS_NOT_NULL(node = NCDR(node)));
     break;
 
-  case N_QUANTIFIER:
-    r = subexp_recursive_check(NQUANTIFIER(node).target);
+  case NT_QTFR:
+    r = subexp_recursive_check(NQTFR(node)->target);
     break;
 
-  case N_ANCHOR:
+  case NT_ANCHOR:
     {
-      AnchorNode* an = &(NANCHOR(node));
+      AnchorNode* an = NANCHOR(node);
       switch (an->type) {
       case ANCHOR_PREC_READ:
       case ANCHOR_PREC_READ_NOT:
@@ -2892,20 +2880,20 @@ subexp_recursive_check(Node* node)
     }
     break;
 
-  case N_CALL:
-    r = subexp_recursive_check(NCALL(node).target);
+  case NT_CALL:
+    r = subexp_recursive_check(NCALL(node)->target);
     if (r != 0) SET_CALL_RECURSION(node);
     break;
 
-  case N_EFFECT:
-    if (IS_EFFECT_MARK2(&(NEFFECT(node))))
+  case NT_ENCLOSE:
+    if (IS_ENCLOSE_MARK2(NENCLOSE(node)))
       return 0;
-    else if (IS_EFFECT_MARK1(&(NEFFECT(node))))
+    else if (IS_ENCLOSE_MARK1(NENCLOSE(node)))
       return 1; /* recursion */
     else {
-      SET_EFFECT_STATUS(node, NST_MARK2);
-      r = subexp_recursive_check(NEFFECT(node).target);
-      CLEAR_EFFECT_STATUS(node, NST_MARK2);
+      SET_ENCLOSE_STATUS(node, NST_MARK2);
+      r = subexp_recursive_check(NENCLOSE(node)->target);
+      CLEAR_ENCLOSE_STATUS(node, NST_MARK2);
     }
     break;
 
@@ -2927,29 +2915,29 @@ subexp_recursive_check_trav(Node* node, ScanEnv* env)
 
   type = NTYPE(node);
   switch (type) {
-  case N_LIST:
-  case N_ALT:
+  case NT_LIST:
+  case NT_ALT:
     {
       int ret;
       do {
-       ret = subexp_recursive_check_trav(NCONS(node).left, env);
+       ret = subexp_recursive_check_trav(NCAR(node), env);
        if (ret == FOUND_CALLED_NODE) r = FOUND_CALLED_NODE;
        else if (ret < 0) return ret;
-      } while (IS_NOT_NULL(node = NCONS(node).right));
+      } while (IS_NOT_NULL(node = NCDR(node)));
     }
     break;
 
-  case N_QUANTIFIER:
-    r = subexp_recursive_check_trav(NQUANTIFIER(node).target, env);
-    if (NQUANTIFIER(node).upper == 0) {
+  case NT_QTFR:
+    r = subexp_recursive_check_trav(NQTFR(node)->target, env);
+    if (NQTFR(node)->upper == 0) {
       if (r == FOUND_CALLED_NODE)
-       NQUANTIFIER(node).is_refered = 1;
+       NQTFR(node)->is_refered = 1;
     }
     break;
 
-  case N_ANCHOR:
+  case NT_ANCHOR:
     {
-      AnchorNode* an = &(NANCHOR(node));
+      AnchorNode* an = NANCHOR(node);
       switch (an->type) {
       case ANCHOR_PREC_READ:
       case ANCHOR_PREC_READ_NOT:
@@ -2961,20 +2949,20 @@ subexp_recursive_check_trav(Node* node, ScanEnv* env)
     }
     break;
 
-  case N_EFFECT:
+  case NT_ENCLOSE:
     {
-      EffectNode* en = &(NEFFECT(node));
+      EncloseNode* en = NENCLOSE(node);
 
-      if (! IS_EFFECT_RECURSION(en)) {
-       if (IS_EFFECT_CALLED(en)) {
-         SET_EFFECT_STATUS(node, NST_MARK1);
+      if (! IS_ENCLOSE_RECURSION(en)) {
+       if (IS_ENCLOSE_CALLED(en)) {
+         SET_ENCLOSE_STATUS(node, NST_MARK1);
          r = subexp_recursive_check(en->target);
-         if (r != 0) SET_EFFECT_STATUS(node, NST_RECURSION);
-         CLEAR_EFFECT_STATUS(node, NST_MARK1);
+         if (r != 0) SET_ENCLOSE_STATUS(node, NST_RECURSION);
+         CLEAR_ENCLOSE_STATUS(node, NST_MARK1);
        }
       }
       r = subexp_recursive_check_trav(en->target, env);
-      if (IS_EFFECT_CALLED(en))
+      if (IS_ENCLOSE_CALLED(en))
        r |= FOUND_CALLED_NODE;
     }
     break;
@@ -2994,46 +2982,33 @@ setup_subexp_call(Node* node, ScanEnv* env)
 
   type = NTYPE(node);
   switch (type) {
-  case N_LIST:
+  case NT_LIST:
     do {
-      r = setup_subexp_call(NCONS(node).left, env);
-    } while (r == 0 && IS_NOT_NULL(node = NCONS(node).right));
+      r = setup_subexp_call(NCAR(node), env);
+    } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
     break;
 
-  case N_ALT:
+  case NT_ALT:
     do {
-      r = setup_subexp_call(NCONS(node).left, env);
-    } while (r == 0 && IS_NOT_NULL(node = NCONS(node).right));
+      r = setup_subexp_call(NCAR(node), env);
+    } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
     break;
 
-  case N_QUANTIFIER:
-    r = setup_subexp_call(NQUANTIFIER(node).target, env);
+  case NT_QTFR:
+    r = setup_subexp_call(NQTFR(node)->target, env);
     break;
-  case N_EFFECT:
-    r = setup_subexp_call(NEFFECT(node).target, env);
+  case NT_ENCLOSE:
+    r = setup_subexp_call(NENCLOSE(node)->target, env);
     break;
 
-  case N_CALL:
+  case NT_CALL:
     {
-      int n, num, *refs;
-      UChar *p;
-      CallNode* cn = &(NCALL(node));
+      CallNode* cn = NCALL(node);
       Node** nodes = SCANENV_MEM_NODES(env);
 
-#ifdef USE_NAMED_GROUP
-      n = onig_name_to_group_numbers(env->reg, cn->name, cn->name_end, &refs);
-#else
-      n = -1;
-#endif
-      if (n <= 0) {
-       /* name not found, check group number. (?*ddd) */
-       p = cn->name;
-       num = onig_scan_unsigned_number(&p, cn->name_end, env->enc);
-       if (num <= 0 || p != cn->name_end) {
-         onig_scan_env_set_error_string(env,
-                 ONIGERR_UNDEFINED_NAME_REFERENCE, cn->name, cn->name_end);
-         return ONIGERR_UNDEFINED_NAME_REFERENCE;
-       }
+      if (cn->group_num != 0) {
+       int gnum = cn->group_num;
+
 #ifdef USE_NAMED_GROUP
        if (env->num_named > 0 &&
            IS_SYNTAX_BV(env->syntax, ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP) &&
@@ -3041,38 +3016,53 @@ setup_subexp_call(Node* node, ScanEnv* env)
          return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED;
        }
 #endif
-       if (num > env->num_mem) {
+       if (gnum > env->num_mem) {
          onig_scan_env_set_error_string(env,
                 ONIGERR_UNDEFINED_GROUP_REFERENCE, cn->name, cn->name_end);
          return ONIGERR_UNDEFINED_GROUP_REFERENCE;
        }
-       cn->ref_num = num;
-       goto set_call_attr;
-      }
-      else if (n > 1) {
-       onig_scan_env_set_error_string(env,
-              ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL, cn->name, cn->name_end);
-       return ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL;
-      }
-      else {
-       cn->ref_num = refs[0];
+
+#ifdef USE_NAMED_GROUP
       set_call_attr:
-       cn->target = nodes[cn->ref_num];
+#endif
+       cn->target = nodes[cn->group_num];
        if (IS_NULL(cn->target)) {
          onig_scan_env_set_error_string(env,
-                 ONIGERR_UNDEFINED_NAME_REFERENCE, cn->name, cn->name_end);
+                ONIGERR_UNDEFINED_NAME_REFERENCE, cn->name, cn->name_end);
          return ONIGERR_UNDEFINED_NAME_REFERENCE;
        }
-       SET_EFFECT_STATUS(cn->target, NST_CALLED);
-       BIT_STATUS_ON_AT(env->bt_mem_start, cn->ref_num);
+       SET_ENCLOSE_STATUS(cn->target, NST_CALLED);
+       BIT_STATUS_ON_AT(env->bt_mem_start, cn->group_num);
        cn->unset_addr_list = env->unset_addr_list;
       }
+#ifdef USE_NAMED_GROUP
+      else {
+       int *refs;
+
+       int n = onig_name_to_group_numbers(env->reg, cn->name, cn->name_end,
+                                          &refs);
+       if (n <= 0) {
+         onig_scan_env_set_error_string(env,
+                ONIGERR_UNDEFINED_NAME_REFERENCE, cn->name, cn->name_end);
+         return ONIGERR_UNDEFINED_NAME_REFERENCE;
+       }
+       else if (n > 1) {
+         onig_scan_env_set_error_string(env,
+           ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL, cn->name, cn->name_end);
+         return ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL;
+       }
+       else {
+         cn->group_num = refs[0];
+         goto set_call_attr;
+       }
+      }
+#endif
     }
     break;
 
-  case N_ANCHOR:
+  case NT_ANCHOR:
     {
-      AnchorNode* an = &(NANCHOR(node));
+      AnchorNode* an = NANCHOR(node);
 
       switch (an->type) {
       case ANCHOR_PREC_READ:
@@ -3100,30 +3090,29 @@ setup_subexp_call(Node* node, ScanEnv* env)
 static int
 divide_look_behind_alternatives(Node* node)
 {
-  Node tmp_node;
   Node *head, *np, *insert_node;
-  AnchorNode* an = &(NANCHOR(node));
+  AnchorNode* an = NANCHOR(node);
   int anc_type = an->type;
 
   head = an->target;
-  np = NCONS(head).left;
-  tmp_node = *node; *node = *head; *head = tmp_node;
-  NCONS(node).left = head;
-  NANCHOR(head).target = np;
+  np = NCAR(head);
+  swap_node(node, head);
+  NCAR(node) = head;
+  NANCHOR(head)->target = np;
 
   np = node;
-  while ((np = NCONS(np).right) != NULL_NODE) {
+  while ((np = NCDR(np)) != NULL_NODE) {
     insert_node = onig_node_new_anchor(anc_type);
-    CHECK_NULL_RETURN_VAL(insert_node, ONIGERR_MEMORY);
-    NANCHOR(insert_node).target = NCONS(np).left;
-    NCONS(np).left = insert_node;
+    CHECK_NULL_RETURN_MEMERR(insert_node);
+    NANCHOR(insert_node)->target = NCAR(np);
+    NCAR(np) = insert_node;
   }
 
   if (anc_type == ANCHOR_LOOK_BEHIND_NOT) {
     np = node;
     do {
-      np->type = N_LIST;  /* alt -> list */
-    } while ((np = NCONS(np).right) != NULL_NODE);
+      SET_NTYPE(np, NT_LIST);  /* alt -> list */
+    } while ((np = NCDR(np)) != NULL_NODE);
   }
   return 0;
 }
@@ -3132,7 +3121,7 @@ static int
 setup_look_behind(Node* node, regex_t* reg, ScanEnv* env)
 {
   int r, len;
-  AnchorNode* an = &(NANCHOR(node));
+  AnchorNode* an = NANCHOR(node);
 
   r = get_char_length_tree(an->target, reg, &len);
   if (r == 0)
@@ -3156,11 +3145,15 @@ next_setup(Node* node, Node* next_node, regex_t* reg)
 
  retry:
   type = NTYPE(node);
-  if (type == N_QUANTIFIER) {
-    QuantifierNode* qn = &(NQUANTIFIER(node));
+  if (type == NT_QTFR) {
+    QtfrNode* qn = NQTFR(node);
     if (qn->greedy && IS_REPEAT_INFINITE(qn->upper)) {
-#ifdef USE_QUANTIFIER_PEEK_NEXT
-      qn->next_head_exact = get_head_value_node(next_node, 1, reg);
+#ifdef USE_QTFR_PEEK_NEXT
+      Node* n = get_head_value_node(next_node, 1, reg);
+      /* '\0': for UTF-16BE etc... */
+      if (IS_NOT_NULL(n) && NSTR(n)->s[0] != '\0') {
+       qn->next_head_exact = n;
+      }
 #endif
       /* automatic posseivation a*b ==> (?>a*)b */
       if (qn->lower <= 1) {
@@ -3171,20 +3164,20 @@ next_setup(Node* node, Node* next_node, regex_t* reg)
          if (IS_NOT_NULL(x)) {
            y = get_head_value_node(next_node,  0, reg);
            if (IS_NOT_NULL(y) && is_not_included(x, y, reg)) {
-             Node* en = onig_node_new_effect(EFFECT_STOP_BACKTRACK);
-             CHECK_NULL_RETURN_VAL(en, ONIGERR_MEMORY);
-             SET_EFFECT_STATUS(en, NST_STOP_BT_SIMPLE_REPEAT);
+             Node* en = onig_node_new_enclose(ENCLOSE_STOP_BACKTRACK);
+             CHECK_NULL_RETURN_MEMERR(en);
+             SET_ENCLOSE_STATUS(en, NST_STOP_BT_SIMPLE_REPEAT);
              swap_node(node, en);
-             NEFFECT(node).target = en;
+             NENCLOSE(node)->target = en;
            }
          }
        }
       }
     }
   }
-  else if (type == N_EFFECT) {
-    EffectNode* en = &(NEFFECT(node));
-    if (en->type == EFFECT_MEMORY) {
+  else if (type == NT_ENCLOSE) {
+    EncloseNode* en = NENCLOSE(node);
+    if (en->type == ENCLOSE_MEMORY) {
       node = en->target;
       goto retry;
     }
@@ -3194,100 +3187,318 @@ next_setup(Node* node, Node* next_node, regex_t* reg)
 
 
 static int
-divide_ambig_string_node_sub(regex_t* reg, int prev_ambig,
-                             UChar* prev_start, UChar* prev,
-                             UChar* end, Node*** tailp, Node** root)
+update_string_node_case_fold(regex_t* reg, Node *node)
 {
-  UChar *tmp, *wp;
-  Node* snode;
+  UChar *p, *q, *end, buf[ONIGENC_MBC_CASE_FOLD_MAXLEN];
+  UChar *sbuf, *ebuf, *sp;
+  int r, i, len, sbuf_size;
+  StrNode* sn = NSTR(node);
+
+  end = sn->end;
+  sbuf_size = (end - sn->s) * 2;
+  sbuf = (UChar* )xmalloc(sbuf_size);
+  CHECK_NULL_RETURN_MEMERR(sbuf);
+  ebuf = sbuf + sbuf_size;
 
-  if (prev_ambig != 0) {
-    tmp = prev_start;
-    wp  = prev_start;
-    while (tmp < prev) {
-      wp += ONIGENC_MBC_TO_NORMALIZE(reg->enc, reg->ambig_flag,
-                                     &tmp, end, wp);
+  sp = sbuf;
+  p = sn->s;
+  while (p < end) {
+    len = ONIGENC_MBC_CASE_FOLD(reg->enc, reg->case_fold_flag, &p, end, buf);
+    q = buf;
+    for (i = 0; i < len; i++) {
+      if (sp >= ebuf) {
+       sbuf = (UChar* )xrealloc(sbuf, sbuf_size * 2);
+       CHECK_NULL_RETURN_MEMERR(sbuf);
+       sp = sbuf + sbuf_size;
+       sbuf_size *= 2;
+       ebuf = sbuf + sbuf_size;
+      }
+
+      *sp++ = buf[i];
     }
-    snode = onig_node_new_str(prev_start, wp);
-    CHECK_NULL_RETURN_VAL(snode, ONIGERR_MEMORY);
-    NSTRING_SET_AMBIG(snode);
-    if (wp != prev) NSTRING_SET_AMBIG_REDUCE(snode);
   }
-  else {
-    snode = onig_node_new_str(prev_start, prev);
-    CHECK_NULL_RETURN_VAL(snode, ONIGERR_MEMORY);
+
+  r = onig_node_str_set(node, sbuf, sp);
+  if (r != 0) {
+    xfree(sbuf);
+    return r;
+  }
+
+  xfree(sbuf);
+  return 0;
+}
+
+static int
+expand_case_fold_make_rem_string(Node** rnode, UChar *s, UChar *end,
+                                regex_t* reg)
+{
+  int r;
+  Node *node;
+
+  node = onig_node_new_str(s, end);
+  if (IS_NULL(node)) return ONIGERR_MEMORY;
+
+  r = update_string_node_case_fold(reg, node);
+  if (r != 0) {
+    onig_node_free(node);
+    return r;
+  }
+
+  NSTRING_SET_AMBIG(node);
+  NSTRING_SET_DONT_GET_OPT_INFO(node);
+  *rnode = node;
+  return 0;
+}
+
+static int
+expand_case_fold_string_alt(int item_num, OnigCaseFoldCodeItem items[],
+                           UChar *p, int slen, UChar *end,
+                           regex_t* reg, Node **rnode)
+{
+  int r, i, j, len, varlen;
+  Node *anode, *var_anode, *snode, *xnode, *an;
+  UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN];
+
+  *rnode = var_anode = NULL_NODE;
+
+  varlen = 0;
+  for (i = 0; i < item_num; i++) {
+    if (items[i].byte_len != slen) {
+      varlen = 1;
+      break;
+    }
   }
 
-  if (*tailp == (Node** )0) {
-    *root = onig_node_new_list(snode, NULL);
-    CHECK_NULL_RETURN_VAL(*root, ONIGERR_MEMORY);
-    *tailp = &(NCONS(*root).right);
+  if (varlen != 0) {
+    *rnode = var_anode = onig_node_new_alt(NULL_NODE, NULL_NODE);
+    if (IS_NULL(var_anode)) return ONIGERR_MEMORY;
+
+    xnode = onig_node_new_list(NULL, NULL);
+    if (IS_NULL(xnode)) goto mem_err;
+    NCAR(var_anode) = xnode;
+
+    anode = onig_node_new_alt(NULL_NODE, NULL_NODE);
+    if (IS_NULL(anode)) goto mem_err;
+    NCAR(xnode) = anode;
   }
   else {
-    **tailp = onig_node_new_list(snode, NULL);
-    CHECK_NULL_RETURN_VAL(**tailp, ONIGERR_MEMORY);
-    *tailp = &(NCONS(**tailp).right);
+    *rnode = anode = onig_node_new_alt(NULL_NODE, NULL_NODE);
+    if (IS_NULL(anode)) return ONIGERR_MEMORY;
   }
 
-  return 0;
+  snode = onig_node_new_str(p, p + slen);
+  if (IS_NULL(snode)) goto mem_err;
+
+  NCAR(anode) = snode;
+
+  for (i = 0; i < item_num; i++) {
+    snode = onig_node_new_str(NULL, NULL);
+    if (IS_NULL(snode)) goto mem_err;
+    
+    for (j = 0; j < items[i].code_len; j++) {
+      len = ONIGENC_CODE_TO_MBC(reg->enc, items[i].code[j], buf);
+      if (len < 0) {
+       r = len;
+       goto mem_err2;
+      }
+
+      r = onig_node_str_cat(snode, buf, buf + len);
+      if (r != 0) goto mem_err2;
+    }
+
+    an = onig_node_new_alt(NULL_NODE, NULL_NODE);
+    if (IS_NULL(an)) {
+      goto mem_err2;
+    }
+
+    if (items[i].byte_len != slen) {
+      Node *rem;
+      UChar *q = p + items[i].byte_len;
+
+      if (q < end) {
+       r = expand_case_fold_make_rem_string(&rem, q, end, reg);
+       if (r != 0) {
+         onig_node_free(an);
+         goto mem_err2;
+       }
+
+       xnode = onig_node_list_add(NULL_NODE, snode);
+       if (IS_NULL(xnode)) {
+         onig_node_free(an);
+         onig_node_free(rem);
+         goto mem_err2;
+       }
+       if (IS_NULL(onig_node_list_add(xnode, rem))) {
+         onig_node_free(an);
+         onig_node_free(xnode);
+         onig_node_free(rem);
+         goto mem_err;
+       }
+
+       NCAR(an) = xnode;
+      }
+      else {
+       NCAR(an) = snode;
+      }
+
+      NCDR(var_anode) = an;
+      var_anode = an;
+    }
+    else {
+      NCAR(an)     = snode;
+      NCDR(anode) = an;
+      anode = an;
+    }
+  }
+
+  return varlen;
+
+ mem_err2:
+  onig_node_free(snode);
+
+ mem_err:
+  onig_node_free(*rnode);
+
+  return ONIGERR_MEMORY;
 }
 
 static int
-divide_ambig_string_node(Node* node, regex_t* reg)
+expand_case_fold_string(Node* node, regex_t* reg)
 {
-  StrNode* sn = &NSTRING(node);
-  int ambig, prev_ambig;
-  UChar *prev, *p, *end, *prev_start, *start, *tmp, *wp;
-  Node *root = NULL_NODE;
-  Node **tailp = (Node** )0;
-  int r;
+#define THRESHOLD_CASE_FOLD_ALT_FOR_EXPANSION  8
 
-  start = prev_start = p = sn->s;
-  end  = sn->end;
-  if (p >= end) return 0;
+  int r, n, len, alt_num;
+  UChar *start, *end, *p;
+  Node *top_root, *root, *snode, *prev_node;
+  OnigCaseFoldCodeItem items[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM];
+  StrNode* sn = NSTR(node);
 
-  prev_ambig = ONIGENC_IS_MBC_AMBIGUOUS(reg->enc, reg->ambig_flag, &p, end);
+  if (NSTRING_IS_AMBIG(node)) return 0;
 
+  start = sn->s;
+  end   = sn->end;
+  if (start >= end) return 0;
+
+  r = 0;
+  top_root = root = prev_node = snode = NULL_NODE;
+  alt_num = 1;
+  p = start;
   while (p < end) {
-    prev = p;
-    if (prev_ambig != (ambig = ONIGENC_IS_MBC_AMBIGUOUS(reg->enc,
-                                              reg->ambig_flag, &p, end))) {
+    n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(reg->enc, reg->case_fold_flag,
+                                          p, end, items);
+    if (n < 0) {
+      r = n;
+      goto err;
+    }
+
+    len = enclen(reg->enc, p);
+
+    if (n == 0) {
+      if (IS_NULL(snode)) {
+       if (IS_NULL(root) && IS_NOT_NULL(prev_node)) {
+         top_root = root = onig_node_list_add(NULL_NODE, prev_node);
+         if (IS_NULL(root)) {
+           onig_node_free(prev_node);
+           goto mem_err;
+         }
+       }
 
-      r = divide_ambig_string_node_sub(reg, prev_ambig, prev_start, prev,
-                                       end, &tailp, &root);
-      if (r != 0) return r;
+       prev_node = snode = onig_node_new_str(NULL, NULL);
+       if (IS_NULL(snode)) goto mem_err;
+       if (IS_NOT_NULL(root)) {
+         if (IS_NULL(onig_node_list_add(root, snode))) {
+           onig_node_free(snode);
+           goto mem_err;
+         }
+       }
+      }
 
-      prev_ambig = ambig;
-      prev_start = prev;
+      r = onig_node_str_cat(snode, p, p + len);
+      if (r != 0) goto err;
     }
-  }
+    else {
+      alt_num *= (n + 1);
+      if (alt_num > THRESHOLD_CASE_FOLD_ALT_FOR_EXPANSION) break;
+
+      if (IS_NULL(root) && IS_NOT_NULL(prev_node)) {
+       top_root = root = onig_node_list_add(NULL_NODE, prev_node);
+       if (IS_NULL(root)) {
+         onig_node_free(prev_node);
+         goto mem_err;
+       }
+      }
 
-  if (prev_start == start) {
-    if (prev_ambig != 0) {
-      NSTRING_SET_AMBIG(node);
-      tmp = start;
-      wp  = start;
-      while (tmp < end) {
-        wp += ONIGENC_MBC_TO_NORMALIZE(reg->enc, reg->ambig_flag,
-                                       &tmp, end, wp);
+      r = expand_case_fold_string_alt(n, items, p, len, end, reg, &prev_node);
+      if (r < 0) goto mem_err;
+      if (r == 1) {
+       if (IS_NULL(root)) {
+         top_root = prev_node;
+       }
+       else {
+         if (IS_NULL(onig_node_list_add(root, prev_node))) {
+           onig_node_free(prev_node);
+           goto mem_err;
+         }
+       }
+
+       root = NCAR(prev_node);
+      }
+      else { /* r == 0 */
+       if (IS_NOT_NULL(root)) {
+         if (IS_NULL(onig_node_list_add(root, prev_node))) {
+           onig_node_free(prev_node);
+           goto mem_err;
+         }
+       }
       }
-      if (wp != sn->end) NSTRING_SET_AMBIG_REDUCE(node);
-      sn->end = wp;
+
+      snode = NULL_NODE;
     }
+
+    p += len;
   }
-  else {
-    r = divide_ambig_string_node_sub(reg, prev_ambig, prev_start, end,
-                                     end, &tailp, &root);
-    if (r != 0) return r;
 
-    swap_node(node, root);
-    onig_node_str_clear(root); /* should be after swap! */
-    onig_node_free(root);      /* free original string node */
+  if (p < end) {
+    Node *srem;
+
+    r = expand_case_fold_make_rem_string(&srem, p, end, reg);
+    if (r != 0) goto mem_err;
+
+    if (IS_NOT_NULL(prev_node) && IS_NULL(root)) {
+      top_root = root = onig_node_list_add(NULL_NODE, prev_node);
+      if (IS_NULL(root)) {
+       onig_node_free(srem);
+       onig_node_free(prev_node);
+       goto mem_err;
+      }
+    }
+
+    if (IS_NULL(root)) {
+      prev_node = srem;
+    }
+    else {
+      if (IS_NULL(onig_node_list_add(root, srem))) {
+       onig_node_free(srem);
+       goto mem_err;
+      }
+    }
   }
 
+  /* ending */
+  top_root = (IS_NOT_NULL(top_root) ? top_root : prev_node);
+  swap_node(node, top_root);
+  onig_node_free(top_root);
   return 0;
+
+ mem_err:
+  r = ONIGERR_MEMORY;
+
+ err:
+  onig_node_free(top_root);
+  return r;
 }
 
+
 #ifdef USE_COMBINATION_EXPLOSION_CHECK
 
 #define CEC_THRES_NUM_BIG_REPEAT         512
@@ -3305,31 +3516,31 @@ setup_comb_exp_check(Node* node, int state, ScanEnv* env)
 
   type = NTYPE(node);
   switch (type) {
-  case N_LIST:
+  case NT_LIST:
     {
       Node* prev = NULL_NODE;
       do {
-       r = setup_comb_exp_check(NCONS(node).left, r, env);
-       prev = NCONS(node).left;
-      } while (r >= 0 && IS_NOT_NULL(node = NCONS(node).right));
+       r = setup_comb_exp_check(NCAR(node), r, env);
+       prev = NCAR(node);
+      } while (r >= 0 && IS_NOT_NULL(node = NCDR(node)));
     }
     break;
 
-  case N_ALT:
+  case NT_ALT:
     {
       int ret;
       do {
-       ret = setup_comb_exp_check(NCONS(node).left, state, env);
+       ret = setup_comb_exp_check(NCAR(node), state, env);
        r |= ret;
-      } while (ret >= 0 && IS_NOT_NULL(node = NCONS(node).right));
+      } while (ret >= 0 && IS_NOT_NULL(node = NCDR(node)));
     }
     break;
 
-  case N_QUANTIFIER:
+  case NT_QTFR:
     {
       int child_state = state;
       int add_state = 0;
-      QuantifierNode* qn = &(NQUANTIFIER(node));
+      QtfrNode* qn = NQTFR(node);
       Node* target = qn->target;
       int var_num;
 
@@ -3340,11 +3551,11 @@ setup_comb_exp_check(Node* node, int state, ScanEnv* env)
 
          /* check (a*){n,m}, (a+){n,m} => (a*){n,n}, (a+){n,n} */
          if (env->backrefed_mem == 0) {
-           if (NTYPE(qn->target) == N_EFFECT) {
-             EffectNode* en = &(NEFFECT(qn->target));
-             if (en->type == EFFECT_MEMORY) {
-               if (NTYPE(en->target) == N_QUANTIFIER) {
-                 QuantifierNode* q = &(NQUANTIFIER(en->target));
+           if (NTYPE(qn->target) == NT_ENCLOSE) {
+             EncloseNode* en = NENCLOSE(qn->target);
+             if (en->type == ENCLOSE_MEMORY) {
+               if (NTYPE(en->target) == NT_QTFR) {
+                 QtfrNode* q = NQTFR(en->target);
                  if (IS_REPEAT_INFINITE(q->upper)
                      && q->greedy == qn->greedy) {
                    qn->upper = (qn->lower == 0 ? 1 : qn->lower);
@@ -3390,12 +3601,12 @@ setup_comb_exp_check(Node* node, int state, ScanEnv* env)
     }
     break;
 
-  case N_EFFECT:
+  case NT_ENCLOSE:
     {
-      EffectNode* en = &(NEFFECT(node));
+      EncloseNode* en = NENCLOSE(node);
 
       switch (en->type) {
-      case EFFECT_MEMORY:
+      case ENCLOSE_MEMORY:
        {
          if (env->curr_max_regnum < en->regnum)
            env->curr_max_regnum = en->regnum;
@@ -3412,11 +3623,11 @@ setup_comb_exp_check(Node* node, int state, ScanEnv* env)
     break;
 
 #ifdef USE_SUBEXP_CALL
-  case N_CALL:
-    if (IS_CALL_RECURSION(&(NCALL(node))))
+  case NT_CALL:
+    if (IS_CALL_RECURSION(NCALL(node)))
       env->has_recursion = 1;
     else
-      r = setup_comb_exp_check(NCALL(node).target, state, env);
+      r = setup_comb_exp_check(NCALL(node)->target, state, env);
     break;
 #endif
 
@@ -3449,68 +3660,68 @@ setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env)
 
   type = NTYPE(node);
   switch (type) {
-  case N_LIST:
+  case NT_LIST:
     {
       Node* prev = NULL_NODE;
       do {
-       r = setup_tree(NCONS(node).left, reg, state, env);
+       r = setup_tree(NCAR(node), reg, state, env);
        if (IS_NOT_NULL(prev) && r == 0) {
-         r = next_setup(prev, NCONS(node).left, reg);
+         r = next_setup(prev, NCAR(node), reg);
        }
-       prev = NCONS(node).left;
-      } while (r == 0 && IS_NOT_NULL(node = NCONS(node).right));
+       prev = NCAR(node);
+      } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
     }
     break;
 
-  case N_ALT:
+  case NT_ALT:
     do {
-      r = setup_tree(NCONS(node).left, reg, (state | IN_ALT), env);
-    } while (r == 0 && IS_NOT_NULL(node = NCONS(node).right));
+      r = setup_tree(NCAR(node), reg, (state | IN_ALT), env);
+    } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
     break;
 
-  case N_CCLASS:
+  case NT_CCLASS:
     break;
 
-  case N_STRING:
+  case NT_STR:
     if (IS_IGNORECASE(reg->options) && !NSTRING_IS_RAW(node)) {
-      r = divide_ambig_string_node(node, reg);
+      r = expand_case_fold_string(node, reg);
     }
     break;
 
-  case N_CTYPE:
-  case N_ANYCHAR:
+  case NT_CTYPE:
+  case NT_CANY:
     break;
 
 #ifdef USE_SUBEXP_CALL
-  case N_CALL:
+  case NT_CALL:
     break;
 #endif
 
-  case N_BACKREF:
+  case NT_BREF:
     {
       int i;
       int* p;
       Node** nodes = SCANENV_MEM_NODES(env);
-      BackrefNode* br = &(NBACKREF(node));
+      BRefNode* br = NBREF(node);
       p = BACKREFS_P(br);
       for (i = 0; i < br->back_num; i++) {
        if (p[i] > env->num_mem)  return ONIGERR_INVALID_BACKREF;
        BIT_STATUS_ON_AT(env->backrefed_mem, p[i]);
        BIT_STATUS_ON_AT(env->bt_mem_start, p[i]);
-#ifdef USE_BACKREF_AT_LEVEL
+#ifdef USE_BACKREF_WITH_LEVEL
        if (IS_BACKREF_NEST_LEVEL(br)) {
          BIT_STATUS_ON_AT(env->bt_mem_end, p[i]);
        }
 #endif
-       SET_EFFECT_STATUS(nodes[p[i]], NST_MEM_BACKREFED);
+       SET_ENCLOSE_STATUS(nodes[p[i]], NST_MEM_BACKREFED);
       }
     }
     break;
 
-  case N_QUANTIFIER:
+  case NT_QTFR:
     {
       OnigDistance d;
-      QuantifierNode* qn = &(NQUANTIFIER(node));
+      QtfrNode* qn = NQTFR(node);
       Node* target = qn->target;
 
       if ((state & IN_REPEAT) != 0) {
@@ -3522,7 +3733,7 @@ setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env)
        if (r) break;
        if (d == 0) {
          qn->target_empty_info = NQ_TARGET_IS_EMPTY;
-#ifdef USE_INFINITE_REPEAT_MONOMANIAC_MEM_STATUS_CHECK
+#ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT
          r = quantifiers_memory_node_info(target);
          if (r < 0) break;
          if (r > 0) {
@@ -3535,7 +3746,7 @@ setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env)
            /*  ()* ==> ()?, ()+ ==> ()  */
            qn->upper = 1;
            if (qn->lower > 1) qn->lower = 1;
-           if (NTYPE(target) == N_STRING) {
+           if (NTYPE(target) == NT_STR) {
              qn->upper = qn->lower = 0;  /* /(?:)+/ ==> // */
            }
          }
@@ -3551,29 +3762,29 @@ setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env)
 
       /* expand string */
 #define EXPAND_STRING_MAX_LENGTH  100
-      if (NTYPE(target) == N_STRING) {
+      if (NTYPE(target) == NT_STR) {
        if (!IS_REPEAT_INFINITE(qn->lower) && qn->lower == qn->upper &&
            qn->lower > 1 && qn->lower <= EXPAND_STRING_MAX_LENGTH) {
          int len = NSTRING_LEN(target);
-         StrNode* sn = &(NSTRING(target));
+         StrNode* sn = NSTR(target);
 
          if (len * qn->lower <= EXPAND_STRING_MAX_LENGTH) {
            int i, n = qn->lower;
-           onig_node_conv_to_str_node(node, NSTRING(target).flag);
+           onig_node_conv_to_str_node(node, NSTR(target)->flag);
            for (i = 0; i < n; i++) {
              r = onig_node_str_cat(node, sn->s, sn->end);
              if (r) break;
            }
            onig_node_free(target);
-           break; /* break case N_QUANTIFIER: */
+           break; /* break case NT_QTFR: */
          }
        }
       }
 
 #ifdef USE_OP_PUSH_OR_JUMP_EXACT
       if (qn->greedy && (qn->target_empty_info != 0)) {
-       if (NTYPE(target) == N_QUANTIFIER) {
-         QuantifierNode* tqn = &(NQUANTIFIER(target));
+       if (NTYPE(target) == NT_QTFR) {
+         QtfrNode* tqn = NQTFR(target);
          if (IS_NOT_NULL(tqn->head_exact)) {
            qn->head_exact  = tqn->head_exact;
            tqn->head_exact = NULL;
@@ -3587,39 +3798,39 @@ setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env)
     }
     break;
 
-  case N_EFFECT:
+  case NT_ENCLOSE:
     {
-      EffectNode* en = &(NEFFECT(node));
+      EncloseNode* en = NENCLOSE(node);
 
       switch (en->type) {
-      case EFFECT_OPTION:
+      case ENCLOSE_OPTION:
        {
          OnigOptionType options = reg->options;
-         reg->options = NEFFECT(node).option;
-         r = setup_tree(NEFFECT(node).target, reg, state, env);
+         reg->options = NENCLOSE(node)->option;
+         r = setup_tree(NENCLOSE(node)->target, reg, state, env);
          reg->options = options;
        }
        break;
 
-      case EFFECT_MEMORY:
+      case ENCLOSE_MEMORY:
        if ((state & (IN_ALT | IN_NOT | IN_VAR_REPEAT)) != 0) {
          BIT_STATUS_ON_AT(env->bt_mem_start, en->regnum);
-         /* SET_EFFECT_STATUS(node, NST_MEM_IN_ALT_NOT); */
+         /* SET_ENCLOSE_STATUS(node, NST_MEM_IN_ALT_NOT); */
        }
         r = setup_tree(en->target, reg, state, env);
         break;
 
-      case EFFECT_STOP_BACKTRACK:
+      case ENCLOSE_STOP_BACKTRACK:
        {
          Node* target = en->target;
          r = setup_tree(target, reg, state, env);
-         if (NTYPE(target) == N_QUANTIFIER) {
-           QuantifierNode* tqn = &(NQUANTIFIER(target));
+         if (NTYPE(target) == NT_QTFR) {
+           QtfrNode* tqn = NQTFR(target);
            if (IS_REPEAT_INFINITE(tqn->upper) && tqn->lower <= 1 &&
                tqn->greedy != 0) {  /* (?>a*), a*+ etc... */
              int qtype = NTYPE(tqn->target);
              if (IS_NODE_TYPE_SIMPLE(qtype))
-               SET_EFFECT_STATUS(node, NST_STOP_BT_SIMPLE_REPEAT);
+               SET_ENCLOSE_STATUS(node, NST_STOP_BT_SIMPLE_REPEAT);
            }
          }
        }
@@ -3628,9 +3839,9 @@ setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env)
     }
     break;
 
-  case N_ANCHOR:
+  case NT_ANCHOR:
     {
-      AnchorNode* an = &(NANCHOR(node));
+      AnchorNode* an = NANCHOR(node);
 
       switch (an->type) {
       case ANCHOR_PREC_READ:
@@ -3642,11 +3853,11 @@ setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env)
 
 /* allowed node types in look-behind */
 #define ALLOWED_TYPE_IN_LB  \
-  ( N_LIST | N_ALT | N_STRING | N_CCLASS | N_CTYPE | \
-    N_ANYCHAR | N_ANCHOR | N_EFFECT | N_QUANTIFIER | N_CALL )
+  ( BIT_NT_LIST | BIT_NT_ALT | BIT_NT_STR | BIT_NT_CCLASS | BIT_NT_CTYPE | \
+    BIT_NT_CANY | BIT_NT_ANCHOR | BIT_NT_ENCLOSE | BIT_NT_QTFR | BIT_NT_CALL )
 
-#define ALLOWED_EFFECT_IN_LB       ( EFFECT_MEMORY )
-#define ALLOWED_EFFECT_IN_LB_NOT   0
+#define ALLOWED_ENCLOSE_IN_LB       ( ENCLOSE_MEMORY )
+#define ALLOWED_ENCLOSE_IN_LB_NOT   0
 
 #define ALLOWED_ANCHOR_IN_LB \
 ( ANCHOR_LOOK_BEHIND | ANCHOR_BEGIN_LINE | ANCHOR_END_LINE | ANCHOR_BEGIN_BUF | ANCHOR_BEGIN_POSITION )
@@ -3656,7 +3867,7 @@ setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env)
       case ANCHOR_LOOK_BEHIND:
        {
          r = check_type_tree(an->target, ALLOWED_TYPE_IN_LB,
-                             ALLOWED_EFFECT_IN_LB, ALLOWED_ANCHOR_IN_LB);
+                             ALLOWED_ENCLOSE_IN_LB, ALLOWED_ANCHOR_IN_LB);
          if (r < 0) return r;
          if (r > 0) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
          r = setup_look_behind(node, reg, env);
@@ -3668,7 +3879,7 @@ setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env)
       case ANCHOR_LOOK_BEHIND_NOT:
        {
          r = check_type_tree(an->target, ALLOWED_TYPE_IN_LB,
-                     ALLOWED_EFFECT_IN_LB_NOT, ALLOWED_ANCHOR_IN_LB_NOT);
+                     ALLOWED_ENCLOSE_IN_LB_NOT, ALLOWED_ANCHOR_IN_LB_NOT);
          if (r < 0) return r;
          if (r > 0) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
          r = setup_look_behind(node, reg, env);
@@ -3689,7 +3900,7 @@ setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env)
 
 /* set skip map for Boyer-Moor search */
 static int
-set_bm_skip(UChar* s, UChar* end, OnigEncoding enc,
+set_bm_skip(UChar* s, UChar* end, OnigEncoding enc ARG_UNUSED,
            UChar skip[], int** int_skip)
 {
   int i, len;
@@ -3722,11 +3933,11 @@ typedef struct {
 } MinMaxLen;
 
 typedef struct {
-  MinMaxLen       mmd;
-  OnigEncoding    enc;
-  OnigOptionType  options;
-  OnigAmbigType   ambig_flag;
-  ScanEnv*        scan_env;
+  MinMaxLen        mmd;
+  OnigEncoding     enc;
+  OnigOptionType   options;
+  OnigCaseFoldType case_fold_flag;
+  ScanEnv*         scan_env;
 } OptEnv;
 
 typedef struct {
@@ -3778,7 +3989,7 @@ map_position_value(OnigEncoding enc, int i)
      6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  5,  5,  5,  5,  1
   };
 
-  if (i < sizeof(ByteValTable)/sizeof(ByteValTable[0])) {
+  if (i < (int )(sizeof(ByteValTable)/sizeof(ByteValTable[0]))) {
     if (i == 0 && ONIGENC_MBC_MINLEN(enc) > 1)
       return 20;
     else
@@ -3810,7 +4021,7 @@ distance_value(MinMaxLen* mm)
   if (mm->max == ONIG_INFINITE_DISTANCE) return 0;
 
   d = mm->max - mm->min;
-  if (d < sizeof(dist_vals)/sizeof(dist_vals[0]))
+  if (d < (int )(sizeof(dist_vals)/sizeof(dist_vals[0])))
     /* return dist_vals[d] * 16 / (mm->min + 12); */
     return (int )dist_vals[d];
   else
@@ -4003,7 +4214,7 @@ concat_opt_exact_info(OptExactInfo* to, OptExactInfo* add, OnigEncoding enc)
   p = add->s;
   end = p + add->len;
   for (i = to->len; p < end; ) {
-    len = enc_len(enc, p);
+    len = enclen(enc, p);
     if (i + len > OPT_EXACT_MAXLEN) break;
     for (j = 0; j < len && p < end; j++)
       to->s[i++] = *p++;
@@ -4018,14 +4229,14 @@ concat_opt_exact_info(OptExactInfo* to, OptExactInfo* add, OnigEncoding enc)
 }
 
 static void
-concat_opt_exact_info_str(OptExactInfo* to,
-                         UChar* s, UChar* end, int raw, OnigEncoding enc)
+concat_opt_exact_info_str(OptExactInfo* to, UChar* s, UChar* end,
+                         int raw ARG_UNUSED, OnigEncoding enc)
 {
   int i, j, len;
   UChar *p;
 
   for (i = to->len, p = s; p < end && i < OPT_EXACT_MAXLEN; ) {
-    len = enc_len(enc, p);
+    len = enclen(enc, p);
     if (i + len > OPT_EXACT_MAXLEN) break;
     for (j = 0; j < len && p < end; j++)
       to->s[i++] = *p++;
@@ -4051,7 +4262,7 @@ alt_merge_opt_exact_info(OptExactInfo* to, OptExactInfo* add, OptEnv* env)
 
   for (i = 0; i < to->len && i < add->len; ) {
     if (to->s[i] != add->s[i]) break;
-    len = enc_len(env->enc, to->s + i);
+    len = enclen(env->enc, to->s + i);
 
     for (j = 1; j < len; j++) {
       if (to->s[i+j] != add->s[i+j]) break;
@@ -4146,29 +4357,23 @@ add_char_opt_map_info(OptMapInfo* map, UChar c, OnigEncoding enc)
 
 static int
 add_char_amb_opt_map_info(OptMapInfo* map, UChar* p, UChar* end,
-                          OnigEncoding enc, OnigAmbigType ambig_flag)
+                          OnigEncoding enc, OnigCaseFoldType case_fold_flag)
 {
-  int i, n, len;
-  UChar buf[ONIGENC_MBC_NORMALIZE_MAXLEN];
-  OnigCodePoint code;
-  const OnigPairAmbigCodes* pccs;
-  OnigAmbigType amb;
+  OnigCaseFoldCodeItem items[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM];
+  UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN];
+  int i, n;
 
   add_char_opt_map_info(map, p[0], enc);
-  code = ONIGENC_MBC_TO_CODE(enc, p, end);
 
-  for (amb = 0x01; amb <= ONIGENC_AMBIGUOUS_MATCH_LIMIT; amb <<= 1) {
-    if ((amb & ambig_flag) == 0)  continue;
+  case_fold_flag = DISABLE_CASE_FOLD_MULTI_CHAR(case_fold_flag);
+  n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc, case_fold_flag, p, end, items);
+  if (n < 0) return n;
 
-    n = ONIGENC_GET_ALL_PAIR_AMBIG_CODES(enc, amb, &pccs);
-    for (i = 0; i < n; i++) {
-      if (pccs[i].from == code) {
-        len = ONIGENC_CODE_TO_MBC(enc, pccs[i].to, buf);
-        if (len < 0) return len;
-        add_char_opt_map_info(map, buf[0], enc);
-      }
-    }
+  for (i = 0; i < n; i++) {
+    ONIGENC_CODE_TO_MBC(enc, items[i].code[0], buf);
+    add_char_opt_map_info(map, buf[0], enc);
   }
+
   return 0;
 }
 
@@ -4341,7 +4546,7 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env)
 
   type = NTYPE(node);
   switch (type) {
-  case N_LIST:
+  case NT_LIST:
     {
       OptEnv nenv;
       NodeOptInfo nopt;
@@ -4349,33 +4554,33 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env)
 
       copy_opt_env(&nenv, env);
       do {
-       r = optimize_node_left(NCONS(nd).left, &nopt, &nenv);
+       r = optimize_node_left(NCAR(nd), &nopt, &nenv);
        if (r == 0) {
          add_mml(&nenv.mmd, &nopt.len);
          concat_left_node_opt_info(env->enc, opt, &nopt);
        }
-      } while (r == 0 && IS_NOT_NULL(nd = NCONS(nd).right));
+      } while (r == 0 && IS_NOT_NULL(nd = NCDR(nd)));
     }
     break;
 
-  case N_ALT:
+  case NT_ALT:
     {
       NodeOptInfo nopt;
       Node* nd = node;
 
       do {
-       r = optimize_node_left(NCONS(nd).left, &nopt, env);
+       r = optimize_node_left(NCAR(nd), &nopt, env);
        if (r == 0) {
          if (nd == node) copy_node_opt_info(opt, &nopt);
          else            alt_merge_node_opt_info(opt, &nopt, env);
        }
-      } while ((r == 0) && IS_NOT_NULL(nd = NCONS(nd).right));
+      } while ((r == 0) && IS_NOT_NULL(nd = NCDR(nd)));
     }
     break;
 
-  case N_STRING:
+  case NT_STR:
     {
-      StrNode* sn = &(NSTRING(node));
+      StrNode* sn = NSTR(node);
       int slen = sn->end - sn->s;
       int is_raw = NSTRING_IS_RAW(node);
 
@@ -4388,25 +4593,26 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env)
         set_mml(&opt->len, slen, slen);
       }
       else {
-        int n, max;
+        int max;
 
-        concat_opt_exact_info_str(&opt->exb, sn->s, sn->end,
-                                  is_raw, env->enc);
-        opt->exb.ignore_case = 1;
+       if (NSTRING_IS_DONT_GET_OPT_INFO(node)) {
+          int n = onigenc_strlen(env->enc, sn->s, sn->end);
+          max = ONIGENC_MBC_MAXLEN_DIST(env->enc) * n;
+       }
+       else {
+         concat_opt_exact_info_str(&opt->exb, sn->s, sn->end,
+                                   is_raw, env->enc);
+         opt->exb.ignore_case = 1;
+
+         if (slen > 0) {
+           r = add_char_amb_opt_map_info(&opt->map, sn->s, sn->end,
+                                         env->enc, env->case_fold_flag);
+           if (r != 0) break;
+         }
 
-       if (slen > 0) {
-          r = add_char_amb_opt_map_info(&opt->map, sn->s, sn->end,
-                                        env->enc, env->ambig_flag);
-          if (r != 0) break;
+         max = slen;
        }
 
-        if (NSTRING_IS_AMBIG_REDUCE(node)) {
-          n = onigenc_strlen(env->enc, sn->s, sn->end);
-          max = ONIGENC_MBC_MAXLEN_DIST(env->enc) * n;
-        }
-        else {
-          max = slen;
-        }
         set_mml(&opt->len, slen, max);
       }
 
@@ -4415,14 +4621,14 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env)
     }
     break;
 
-  case N_CCLASS:
+  case NT_CCLASS:
     {
       int i, z;
-      CClassNode* cc = &(NCCLASS(node));
+      CClassNode* cc = NCCLASS(node);
 
       /* no need to check ignore case. (setted in setup_tree()) */
 
-      if (IS_NOT_NULL(cc->mbuf) || IS_CCLASS_NOT(cc)) {
+      if (IS_NOT_NULL(cc->mbuf) || IS_NCCLASS_NOT(cc)) {
         OnigDistance min = ONIGENC_MBC_MINLEN(env->enc);
        OnigDistance max = ONIGENC_MBC_MAXLEN_DIST(env->enc);
 
@@ -4431,7 +4637,7 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env)
       else {
         for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
           z = BITSET_AT(cc->bs, i);
-          if ((z && !IS_CCLASS_NOT(cc)) || (!z && IS_CCLASS_NOT(cc))) {
+          if ((z && !IS_NCCLASS_NOT(cc)) || (!z && IS_NCCLASS_NOT(cc))) {
             add_char_opt_map_info(&opt->map, (UChar )i, env->enc);
           }
         }
@@ -4440,7 +4646,7 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env)
     }
     break;
 
-  case N_CTYPE:
+  case NT_CTYPE:
     {
       int i, min, max;
 
@@ -4449,21 +4655,22 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env)
       if (max == 1) {
         min = 1;
 
-       switch (NCTYPE(node).type) {
-       case CTYPE_NOT_WORD:
-          for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
-            if (! ONIGENC_IS_CODE_WORD(env->enc, i)) {
-              add_char_opt_map_info(&opt->map, (UChar )i, env->enc);
-            }
-          }
-          break;
-
-       case CTYPE_WORD:
-          for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
-            if (ONIGENC_IS_CODE_WORD(env->enc, i)) {
-              add_char_opt_map_info(&opt->map, (UChar )i, env->enc);
-            }
-          }
+       switch (NCTYPE(node)->ctype) {
+       case ONIGENC_CTYPE_WORD:
+         if (NCTYPE(node)->not != 0) {
+           for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
+             if (! ONIGENC_IS_CODE_WORD(env->enc, i)) {
+               add_char_opt_map_info(&opt->map, (UChar )i, env->enc);
+             }
+           }
+         }
+         else {
+           for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
+             if (ONIGENC_IS_CODE_WORD(env->enc, i)) {
+               add_char_opt_map_info(&opt->map, (UChar )i, env->enc);
+             }
+           }
+         }
          break;
        }
       }
@@ -4474,7 +4681,7 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env)
     }
     break;
 
-  case N_ANYCHAR:
+  case NT_CANY:
     {
       OnigDistance min = ONIGENC_MBC_MINLEN(env->enc);
       OnigDistance max = ONIGENC_MBC_MAXLEN_DIST(env->enc);
@@ -4482,22 +4689,22 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env)
     }
     break;
 
-  case N_ANCHOR:
-    switch (NANCHOR(node).type) {
+  case NT_ANCHOR:
+    switch (NANCHOR(node)->type) {
     case ANCHOR_BEGIN_BUF:
     case ANCHOR_BEGIN_POSITION:
     case ANCHOR_BEGIN_LINE:
     case ANCHOR_END_BUF:
     case ANCHOR_SEMI_END_BUF:
     case ANCHOR_END_LINE:
-      add_opt_anc_info(&opt->anc, NANCHOR(node).type);
+      add_opt_anc_info(&opt->anc, NANCHOR(node)->type);
       break;
 
     case ANCHOR_PREC_READ:
       {
        NodeOptInfo nopt;
 
-       r = optimize_node_left(NANCHOR(node).target, &nopt, env);
+       r = optimize_node_left(NANCHOR(node)->target, &nopt, env);
        if (r == 0) {
          if (nopt.exb.len > 0)
            copy_opt_exact_info(&opt->expr, &nopt.exb);
@@ -4519,13 +4726,13 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env)
     }
     break;
 
-  case N_BACKREF:
+  case NT_BREF:
     {
       int i;
       int* backs;
       OnigDistance min, max, tmin, tmax;
       Node** nodes = SCANENV_MEM_NODES(env->scan_env);
-      BackrefNode* br = &(NBACKREF(node));
+      BRefNode* br = NBREF(node);
 
       if (br->state & NST_RECURSION) {
        set_mml(&opt->len, 0, ONIG_INFINITE_DISTANCE);
@@ -4549,31 +4756,31 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env)
     break;
 
 #ifdef USE_SUBEXP_CALL
-  case N_CALL:
-    if (IS_CALL_RECURSION(&(NCALL(node))))
+  case NT_CALL:
+    if (IS_CALL_RECURSION(NCALL(node)))
       set_mml(&opt->len, 0, ONIG_INFINITE_DISTANCE);
     else {
       OnigOptionType save = env->options;
-      env->options = NEFFECT(NCALL(node).target).option;
-      r = optimize_node_left(NCALL(node).target, opt, env);
+      env->options = NENCLOSE(NCALL(node)->target)->option;
+      r = optimize_node_left(NCALL(node)->target, opt, env);
       env->options = save;
     }
     break;
 #endif
 
-  case N_QUANTIFIER:
+  case NT_QTFR:
     {
       int i;
       OnigDistance min, max;
       NodeOptInfo nopt;
-      QuantifierNode* qn = &(NQUANTIFIER(node));
+      QtfrNode* qn = NQTFR(node);
 
       r = optimize_node_left(qn->target, &nopt, env);
       if (r) break;
 
       if (qn->lower == 0 && IS_REPEAT_INFINITE(qn->upper)) {
        if (env->mmd.max == 0 &&
-           NTYPE(qn->target) == N_ANYCHAR && qn->greedy) {
+           NTYPE(qn->target) == NT_CANY && qn->greedy) {
          if (IS_MULTILINE(env->options))
            add_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR_ML);
          else
@@ -4585,7 +4792,7 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env)
          copy_node_opt_info(opt, &nopt);
          if (nopt.exb.len > 0) {
            if (nopt.exb.reach_end) {
-             for (i = 2; i < qn->lower &&
+             for (i = 2; i <= qn->lower &&
                          ! is_full_opt_exact_info(&opt->exb); i++) {
                concat_opt_exact_info(&opt->exb, &nopt.exb, env->enc);
              }
@@ -4614,12 +4821,12 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env)
     }
     break;
 
-  case N_EFFECT:
+  case NT_ENCLOSE:
     {
-      EffectNode* en = &(NEFFECT(node));
+      EncloseNode* en = NENCLOSE(node);
 
       switch (en->type) {
-      case EFFECT_OPTION:
+      case ENCLOSE_OPTION:
        {
          OnigOptionType save = env->options;
 
@@ -4629,7 +4836,7 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env)
        }
        break;
 
-      case EFFECT_MEMORY:
+      case ENCLOSE_MEMORY:
 #ifdef USE_SUBEXP_CALL
        en->opt_count++;
        if (en->opt_count > MAX_NODE_OPT_INFO_REF_COUNT) {
@@ -4637,8 +4844,8 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env)
 
          min = 0;
          max = ONIG_INFINITE_DISTANCE;
-         if (IS_EFFECT_MIN_FIXED(en)) min = en->min_len;
-         if (IS_EFFECT_MAX_FIXED(en)) max = en->max_len;
+         if (IS_ENCLOSE_MIN_FIXED(en)) min = en->min_len;
+         if (IS_ENCLOSE_MAX_FIXED(en)) max = en->max_len;
          set_mml(&opt->len, min, max);
        }
        else
@@ -4653,7 +4860,7 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env)
        }
        break;
 
-      case EFFECT_STOP_BACKTRACK:
+      case ENCLOSE_STOP_BACKTRACK:
        r = optimize_node_left(en->target, opt, env);
        break;
       }
@@ -4681,7 +4888,7 @@ set_optimize_exact_info(regex_t* reg, OptExactInfo* e)
 
   if (e->ignore_case) {
     reg->exact = (UChar* )xmalloc(e->len);
-    CHECK_NULL_RETURN_VAL(reg->exact, ONIGERR_MEMORY);
+    CHECK_NULL_RETURN_MEMERR(reg->exact);
     xmemcpy(reg->exact, e->s, e->len);
     reg->exact_end = reg->exact + e->len;
     reg->optimize = ONIG_OPTIMIZE_EXACT_IC;
@@ -4689,8 +4896,8 @@ set_optimize_exact_info(regex_t* reg, OptExactInfo* e)
   else {
     int allow_reverse;
 
-    reg->exact = k_strdup(e->s, e->s + e->len);
-    CHECK_NULL_RETURN_VAL(reg->exact, ONIGERR_MEMORY);
+    reg->exact = str_dup(e->s, e->s + e->len);
+    CHECK_NULL_RETURN_MEMERR(reg->exact);
     reg->exact_end = reg->exact + e->len;
  
     allow_reverse =
@@ -4755,9 +4962,9 @@ set_optimize_info_from_tree(Node* node, regex_t* reg, ScanEnv* scan_env)
   NodeOptInfo opt;
   OptEnv env;
 
-  env.enc        = reg->enc;
-  env.options    = reg->options;
-  env.ambig_flag = reg->ambig_flag;
+  env.enc            = reg->enc;
+  env.options        = reg->options;
+  env.case_fold_flag = reg->case_fold_flag;
   env.scan_env   = scan_env;
   clear_mml(&env.mmd);
 
@@ -4839,7 +5046,7 @@ static void print_enc_string(FILE* fp, OnigEncoding enc,
        fputc((int )code, fp);
       }
 
-      p += enc_len(enc, p);
+      p += enclen(enc, p);
     }
   }
   else {
@@ -4971,19 +5178,21 @@ print_optimize_info(FILE* f, regex_t* reg)
 #endif /* ONIG_DEBUG */
 
 
-static void
+extern void
 onig_free_body(regex_t* reg)
 {
-  if (IS_NOT_NULL(reg->p))                xfree(reg->p);
-  if (IS_NOT_NULL(reg->exact))            xfree(reg->exact);
-  if (IS_NOT_NULL(reg->int_map))          xfree(reg->int_map);
-  if (IS_NOT_NULL(reg->int_map_backward)) xfree(reg->int_map_backward);
-  if (IS_NOT_NULL(reg->repeat_range))     xfree(reg->repeat_range);
-  if (IS_NOT_NULL(reg->chain))            onig_free(reg->chain);
+  if (IS_NOT_NULL(reg)) {
+    if (IS_NOT_NULL(reg->p))                xfree(reg->p);
+    if (IS_NOT_NULL(reg->exact))            xfree(reg->exact);
+    if (IS_NOT_NULL(reg->int_map))          xfree(reg->int_map);
+    if (IS_NOT_NULL(reg->int_map_backward)) xfree(reg->int_map_backward);
+    if (IS_NOT_NULL(reg->repeat_range))     xfree(reg->repeat_range);
+    if (IS_NOT_NULL(reg->chain))            onig_free(reg->chain);
 
 #ifdef USE_NAMED_GROUP
-  onig_names_free(reg);
+    onig_names_free(reg);
 #endif
+  }
 }
 
 extern void
@@ -5043,84 +5252,6 @@ onig_chain_reduce(regex_t* reg)
   }
 }
 
-#if 0
-extern int
-onig_clone(regex_t** to, regex_t* from)
-{
-  int r, size;
-  regex_t* reg;
-
-#ifdef USE_MULTI_THREAD_SYSTEM
-  if (ONIG_STATE(from) >= ONIG_STATE_NORMAL) {
-    ONIG_STATE_INC(from);
-    if (IS_NOT_NULL(from->chain) && ONIG_STATE(reg) == ONIG_STATE_NORMAL) {
-      onig_chain_reduce(from);
-      ONIG_STATE_INC(from);
-    }
-  }
-  else {
-    int n = 0;
-    while (ONIG_STATE(from) < ONIG_STATE_NORMAL) {
-      if (++n > THREAD_PASS_LIMIT_COUNT)
-       return ONIGERR_OVER_THREAD_PASS_LIMIT_COUNT;
-      THREAD_PASS;
-    }
-    ONIG_STATE_INC(from);
-  }
-#endif /* USE_MULTI_THREAD_SYSTEM */
-
-  r = onig_alloc_init(&reg, ONIG_OPTION_NONE, ONIGENC_AMBIGUOUS_MATCH_DEFAULT,
-                      from->enc, ONIG_SYNTAX_DEFAULT);
-  if (r != 0) {
-    ONIG_STATE_DEC(from);
-    return r;
-  }
-
-  xmemcpy(reg, from, sizeof(onig_t));
-  reg->chain = (regex_t* )NULL;
-  reg->state = ONIG_STATE_NORMAL;
-
-  if (from->p) {
-    reg->p = (UChar* )xmalloc(reg->alloc);
-    if (IS_NULL(reg->p)) goto mem_error;
-    xmemcpy(reg->p, from->p, reg->alloc);
-  }
-
-  if (from->exact) {
-    reg->exact = (UChar* )xmalloc(from->exact_end - from->exact);
-    if (IS_NULL(reg->exact)) goto mem_error;
-    reg->exact_end = reg->exact + (from->exact_end - from->exact);
-    xmemcpy(reg->exact, from->exact, reg->exact_end - reg->exact);
-  }
-
-  if (from->int_map) {
-    size = sizeof(int) * ONIG_CHAR_TABLE_SIZE;
-    reg->int_map = (int* )xmalloc(size);
-    if (IS_NULL(reg->int_map)) goto mem_error;
-    xmemcpy(reg->int_map, from->int_map, size);
-  }
-
-  if (from->int_map_backward) {
-    size = sizeof(int) * ONIG_CHAR_TABLE_SIZE;
-    reg->int_map_backward = (int* )xmalloc(size);
-    if (IS_NULL(reg->int_map_backward)) goto mem_error;
-    xmemcpy(reg->int_map_backward, from->int_map_backward, size);
-  }
-
-#ifdef USE_NAMED_GROUP
-  reg->name_table = names_clone(from); /* names_clone is not implemented */
-#endif
-
-  ONIG_STATE_DEC(from);
-  *to = reg;
-  return 0;
-
- mem_error:
-  ONIG_STATE_DEC(from);
-  return ONIGERR_MEMORY;
-}
-#endif
-
 #ifdef ONIG_DEBUG
 static void print_compiled_byte_code_list P_((FILE* f, regex_t* reg));
 #endif
@@ -5141,6 +5272,8 @@ onig_compile(regex_t* reg, const UChar* pattern, const UChar* pattern_end,
   UnsetAddrList  uslist;
 #endif
 
+  if (IS_NOT_NULL(einfo)) einfo->par = (UChar* )NULL;
+
   reg->state = ONIG_STATE_COMPILING;
 
 #ifdef ONIG_DEBUG
@@ -5182,10 +5315,6 @@ onig_compile(regex_t* reg, const UChar* pattern, const UChar* pattern_end,
   }
 #endif
 
-#ifdef ONIG_DEBUG_PARSE_TREE
-  print_tree(stderr, root);
-#endif
-
 #ifdef USE_SUBEXP_CALL
   if (scan_env.num_call > 0) {
     r = unset_addr_list_init(&uslist, scan_env.num_call);
@@ -5207,6 +5336,10 @@ onig_compile(regex_t* reg, const UChar* pattern, const UChar* pattern_end,
   r = setup_tree(root, reg, 0, &scan_env);
   if (r != 0) goto err_unset;
 
+#ifdef ONIG_DEBUG_PARSE_TREE
+  print_tree(stderr, root);
+#endif
+
   reg->capture_history  = scan_env.capture_history;
   reg->bt_mem_start     = scan_env.bt_mem_start;
   reg->bt_mem_start    |= reg->capture_history;
@@ -5308,7 +5441,7 @@ onig_compile(regex_t* reg, const UChar* pattern, const UChar* pattern_end,
     }
   }
 
-  if (IS_NOT_NULL(root)) onig_node_free(root);
+  onig_node_free(root);
   if (IS_NOT_NULL(scan_env.mem_nodes_dynamic))
       xfree(scan_env.mem_nodes_dynamic);
   return r;
@@ -5338,12 +5471,16 @@ onig_recompile(regex_t* reg, const UChar* pattern, const UChar* pattern_end,
 static int onig_inited = 0;
 
 extern int
-onig_alloc_init(regex_t** reg, OnigOptionType option, OnigAmbigType ambig_flag,
-                OnigEncoding enc, OnigSyntaxType* syntax)
+onig_reg_init(regex_t* reg, OnigOptionType option,
+             OnigCaseFoldType case_fold_flag,
+             OnigEncoding enc, OnigSyntaxType* syntax)
 {
   if (! onig_inited)
     onig_init();
 
+  if (IS_NULL(reg))
+    return ONIGERR_INVALID_ARGUMENT;
+
   if (ONIGENC_IS_UNDEF(enc))
     return ONIGERR_DEFAULT_ENCODING_IS_NOT_SETTED;
 
@@ -5352,9 +5489,7 @@ onig_alloc_init(regex_t** reg, OnigOptionType option, OnigAmbigType ambig_flag,
     return ONIGERR_INVALID_COMBINATION_OF_OPTIONS;
   }
 
-  *reg = (regex_t* )xmalloc(sizeof(regex_t));
-  if (IS_NULL(*reg)) return ONIGERR_MEMORY;
-  (*reg)->state = ONIG_STATE_MODIFY;
+  (reg)->state = ONIG_STATE_MODIFY;
 
   if ((option & ONIG_OPTION_NEGATE_SINGLELINE) != 0) {
     option |= syntax->options;
@@ -5363,24 +5498,36 @@ onig_alloc_init(regex_t** reg, OnigOptionType option, OnigAmbigType ambig_flag,
   else
     option |= syntax->options;
 
-  (*reg)->enc              = enc;
-  (*reg)->options          = option;
-  (*reg)->syntax           = syntax;
-  (*reg)->optimize         = 0;
-  (*reg)->exact            = (UChar* )NULL;
-  (*reg)->int_map          = (int* )NULL;
-  (*reg)->int_map_backward = (int* )NULL;
-  (*reg)->chain            = (regex_t* )NULL;
+  (reg)->enc              = enc;
+  (reg)->options          = option;
+  (reg)->syntax           = syntax;
+  (reg)->optimize         = 0;
+  (reg)->exact            = (UChar* )NULL;
+  (reg)->int_map          = (int* )NULL;
+  (reg)->int_map_backward = (int* )NULL;
+  (reg)->chain            = (regex_t* )NULL;
+
+  (reg)->p                = (UChar* )NULL;
+  (reg)->alloc            = 0;
+  (reg)->used             = 0;
+  (reg)->name_table       = (void* )NULL;
+
+  (reg)->case_fold_flag   = case_fold_flag;
+  return 0;
+}
 
-  (*reg)->p                = (UChar* )NULL;
-  (*reg)->alloc            = 0;
-  (*reg)->used             = 0;
-  (*reg)->name_table       = (void* )NULL;
+extern int
+onig_new_without_alloc(regex_t* reg, const UChar* pattern,
+          const UChar* pattern_end, OnigOptionType option, OnigEncoding enc,
+          OnigSyntaxType* syntax, OnigErrorInfo* einfo)
+{
+  int r;
 
-  (*reg)->ambig_flag       = ambig_flag;
-  (*reg)->ambig_flag      &= ONIGENC_SUPPORT_AMBIG_FLAG(enc);
+  r = onig_reg_init(reg, option, ONIGENC_CASE_FOLD_DEFAULT, enc, syntax);
+  if (r) return r;
 
-  return 0;
+  r = onig_compile(reg, pattern, pattern_end, einfo);
+  return r;
 }
 
 extern int
@@ -5390,33 +5537,35 @@ onig_new(regex_t** reg, const UChar* pattern, const UChar* pattern_end,
 {
   int r;
 
-  if (IS_NOT_NULL(einfo)) einfo->par = (UChar* )NULL;
+  *reg = (regex_t* )xmalloc(sizeof(regex_t));
+  if (IS_NULL(*reg)) return ONIGERR_MEMORY;
 
-  r = onig_alloc_init(reg, option, ONIGENC_AMBIGUOUS_MATCH_DEFAULT,
-                      enc, syntax);
-  if (r) return r;
+  r = onig_reg_init(*reg, option, ONIGENC_CASE_FOLD_DEFAULT, enc, syntax);
+  if (r) goto err;
 
   r = onig_compile(*reg, pattern, pattern_end, einfo);
   if (r) {
+  err:
     onig_free(*reg);
     *reg = NULL;
   }
   return r;
 }
 
+
 extern int
 onig_init(void)
 {
   if (onig_inited != 0)
     return 0;
 
-  onig_inited = 1;
-
   THREAD_SYSTEM_INIT;
   THREAD_ATOMIC_START;
 
+  onig_inited = 1;
+
   onigenc_init();
-  onigenc_set_default_caseconv_table((UChar* )0);
+  /* onigenc_set_default_caseconv_table((UChar* )0); */
 
 #ifdef ONIG_DEBUG_STATISTICS
   onig_statistics_init();
@@ -5430,8 +5579,6 @@ onig_init(void)
 extern int
 onig_end(void)
 {
-  extern int onig_free_shared_cclass_table(void);
-
   THREAD_ATOMIC_START;
 
 #ifdef ONIG_DEBUG_STATISTICS
@@ -5442,7 +5589,7 @@ onig_end(void)
   onig_free_shared_cclass_table();
 #endif
 
-#ifdef USE_RECYCLE_NODE
+#ifdef USE_PARSE_TREE_NODE_RECYCLE
   onig_free_node_list();
 #endif
 
@@ -5453,6 +5600,64 @@ onig_end(void)
   return 0;
 }
 
+extern int
+onig_is_in_code_range(const UChar* p, OnigCodePoint code)
+{
+  OnigCodePoint n, *data;
+  OnigCodePoint low, high, x;
+
+  GET_CODE_POINT(n, p);
+  data = (OnigCodePoint* )p;
+  data++;
+
+  for (low = 0, high = n; low < high; ) {
+    x = (low + high) >> 1;
+    if (code > data[x * 2 + 1])
+      low = x + 1;
+    else
+      high = x;
+  }
+
+  return ((low < n && code >= data[low * 2]) ? 1 : 0);
+}
+
+extern int
+onig_is_code_in_cc_len(int elen, OnigCodePoint code, CClassNode* cc)
+{
+  int found;
+
+  if (elen > 1 || (code >= SINGLE_BYTE_SIZE)) {
+    if (IS_NULL(cc->mbuf)) {
+      found = 0;
+    }
+    else {
+      found = (onig_is_in_code_range(cc->mbuf->p, code) != 0 ? 1 : 0);
+    }
+  }
+  else {
+    found = (BITSET_AT(cc->bs, code) == 0 ? 0 : 1);
+  }
+
+  if (IS_NCCLASS_NOT(cc))
+    return !found;
+  else
+    return found;
+}
+
+extern int
+onig_is_code_in_cc(OnigEncoding enc, OnigCodePoint code, CClassNode* cc)
+{
+  int len;
+
+  if (ONIGENC_MBC_MINLEN(enc) > 1) {
+    len = 2;
+  }
+  else {
+    len = ONIGENC_CODE_TO_MBCLEN(enc, code);
+  }
+  return onig_is_code_in_cc_len(len, code, cc);
+}
+
 
 #ifdef ONIG_DEBUG
 
@@ -5514,7 +5719,7 @@ OnigOpInfoType OnigOpInfo[] = {
   { OP_BACKREFN_IC,         "backrefn-ic",          ARG_SPECIAL },
   { OP_BACKREF_MULTI,       "backref_multi",        ARG_SPECIAL },
   { OP_BACKREF_MULTI_IC,    "backref_multi-ic",     ARG_SPECIAL },
-  { OP_BACKREF_AT_LEVEL,    "backref_at_level",     ARG_SPECIAL },
+  { OP_BACKREF_WITH_LEVEL,    "backref_at_level",     ARG_SPECIAL },
   { OP_MEMORY_START_PUSH,   "mem-start-push",       ARG_MEMNUM  },
   { OP_MEMORY_START,        "mem-start",            ARG_MEMNUM  },
   { OP_MEMORY_END_PUSH,     "mem-end-push",         ARG_MEMNUM  },
@@ -5706,7 +5911,7 @@ onig_print_compiled_byte_code(FILE* f, UChar* bp, UChar** nextp,
       break;
 
     case OP_EXACT1_IC:
-      len = enc_len(enc, bp);
+      len = enclen(enc, bp);
       p_string(f, len, bp);
       bp += len;
       break;
@@ -5781,7 +5986,7 @@ onig_print_compiled_byte_code(FILE* f, UChar* bp, UChar** nextp,
       }
       break;
 
-    case OP_BACKREF_AT_LEVEL:
+    case OP_BACKREF_WITH_LEVEL:
       {
        OnigOptionType option;
        LengthType level;
@@ -5889,27 +6094,27 @@ print_indent_tree(FILE* f, Node* node, int indent)
 
   type = NTYPE(node);
   switch (type) {
-  case N_LIST:
-  case N_ALT:
-    if (NTYPE(node) == N_LIST)
+  case NT_LIST:
+  case NT_ALT:
+    if (NTYPE(node) == NT_LIST)
       fprintf(f, "<list:%x>\n", (int )node);
     else
       fprintf(f, "<alt:%x>\n", (int )node);
 
-    print_indent_tree(f, NCONS(node).left, indent + add);
-    while (IS_NOT_NULL(node = NCONS(node).right)) {
+    print_indent_tree(f, NCAR(node), indent + add);
+    while (IS_NOT_NULL(node = NCDR(node))) {
       if (NTYPE(node) != type) {
        fprintf(f, "ERROR: list/alt right is not a cons. %d\n", NTYPE(node));
        exit(0);
       }
-      print_indent_tree(f, NCONS(node).left, indent + add);
+      print_indent_tree(f, NCAR(node), indent + add);
     }
     break;
 
-  case N_STRING:
+  case NT_STR:
     fprintf(f, "<string%s:%x>",
            (NSTRING_IS_RAW(node) ? "-raw" : ""), (int )node);
-    for (p = NSTRING(node).s; p < NSTRING(node).end; p++) {
+    for (p = NSTR(node)->s; p < NSTR(node)->end; p++) {
       if (*p >= 0x20 && *p < 0x7f)
        fputc(*p, f);
       else {
@@ -5918,11 +6123,11 @@ print_indent_tree(FILE* f, Node* node, int indent)
     }
     break;
 
-  case N_CCLASS:
+  case NT_CCLASS:
     fprintf(f, "<cclass:%x>", (int )node);
-    if (IS_CCLASS_NOT(&NCCLASS(node))) fputs(" not", f);
-    if (NCCLASS(node).mbuf) {
-      BBuf* bbuf = NCCLASS(node).mbuf;
+    if (IS_NCCLASS_NOT(NCCLASS(node))) fputs(" not", f);
+    if (NCCLASS(node)->mbuf) {
+      BBuf* bbuf = NCCLASS(node)->mbuf;
       for (i = 0; i < bbuf->used; i++) {
        if (i > 0) fprintf(f, ",");
        fprintf(f, "%0x", bbuf->p[i]);
@@ -5930,24 +6135,29 @@ print_indent_tree(FILE* f, Node* node, int indent)
     }
     break;
 
-  case N_CTYPE:
+  case NT_CTYPE:
     fprintf(f, "<ctype:%x> ", (int )node);
-    switch (NCTYPE(node).type) {
-    case CTYPE_WORD:            fputs("word",           f); break;
-    case CTYPE_NOT_WORD:        fputs("not word",       f); break;
+    switch (NCTYPE(node)->ctype) {
+    case ONIGENC_CTYPE_WORD:
+      if (NCTYPE(node)->not != 0)
+       fputs("not word",       f);
+      else
+       fputs("word",           f);
+      break;
+
     default:
       fprintf(f, "ERROR: undefined ctype.\n");
       exit(0);
     }
     break;
 
-  case N_ANYCHAR:
+  case NT_CANY:
     fprintf(f, "<anychar:%x>", (int )node);
     break;
 
-  case N_ANCHOR:
+  case NT_ANCHOR:
     fprintf(f, "<anchor:%x> ", (int )node);
-    switch (NANCHOR(node).type) {
+    switch (NANCHOR(node)->type) {
     case ANCHOR_BEGIN_BUF:      fputs("begin buf",      f); break;
     case ANCHOR_END_BUF:        fputs("end buf",        f); break;
     case ANCHOR_BEGIN_LINE:     fputs("begin line",     f); break;
@@ -5972,10 +6182,10 @@ print_indent_tree(FILE* f, Node* node, int indent)
     }
     break;
 
-  case N_BACKREF:
+  case NT_BREF:
     {
       int* p;
-      BackrefNode* br = &(NBACKREF(node));
+      BRefNode* br = NBREF(node);
       p = BACKREFS_P(br);
       fprintf(f, "<backref:%x>", (int )node);
       for (i = 0; i < br->back_num; i++) {
@@ -5986,33 +6196,33 @@ print_indent_tree(FILE* f, Node* node, int indent)
     break;
 
 #ifdef USE_SUBEXP_CALL
-  case N_CALL:
+  case NT_CALL:
     {
-      CallNode* cn = &(NCALL(node));
+      CallNode* cn = NCALL(node);
       fprintf(f, "<call:%x>", (int )node);
       p_string(f, cn->name_end - cn->name, cn->name);
     }
     break;
 #endif
 
-  case N_QUANTIFIER:
+  case NT_QTFR:
     fprintf(f, "<quantifier:%x>{%d,%d}%s\n", (int )node,
-           NQUANTIFIER(node).lower, NQUANTIFIER(node).upper,
-           (NQUANTIFIER(node).greedy ? "" : "?"));
-    print_indent_tree(f, NQUANTIFIER(node).target, indent + add);
+           NQTFR(node)->lower, NQTFR(node)->upper,
+           (NQTFR(node)->greedy ? "" : "?"));
+    print_indent_tree(f, NQTFR(node)->target, indent + add);
     break;
 
-  case N_EFFECT:
-    fprintf(f, "<effect:%x> ", (int )node);
-    switch (NEFFECT(node).type) {
-    case EFFECT_OPTION:
-      fprintf(f, "option:%d\n", NEFFECT(node).option);
-      print_indent_tree(f, NEFFECT(node).target, indent + add);
+  case NT_ENCLOSE:
+    fprintf(f, "<enclose:%x> ", (int )node);
+    switch (NENCLOSE(node)->type) {
+    case ENCLOSE_OPTION:
+      fprintf(f, "option:%d\n", NENCLOSE(node)->option);
+      print_indent_tree(f, NENCLOSE(node)->target, indent + add);
       break;
-    case EFFECT_MEMORY:
-      fprintf(f, "memory:%d", NEFFECT(node).regnum);
+    case ENCLOSE_MEMORY:
+      fprintf(f, "memory:%d", NENCLOSE(node)->regnum);
       break;
-    case EFFECT_STOP_BACKTRACK:
+    case ENCLOSE_STOP_BACKTRACK:
       fprintf(f, "stop-bt");
       break;
 
@@ -6020,7 +6230,7 @@ print_indent_tree(FILE* f, Node* node, int indent)
       break;
     }
     fprintf(f, "\n");
-    print_indent_tree(f, NEFFECT(node).target, indent + add);
+    print_indent_tree(f, NENCLOSE(node)->target, indent + add);
     break;
 
   default:
@@ -6028,8 +6238,8 @@ print_indent_tree(FILE* f, Node* node, int indent)
     break;
   }
 
-  if (type != N_LIST && type != N_ALT && type != N_QUANTIFIER &&
-      type != N_EFFECT)
+  if (type != NT_LIST && type != NT_ALT && type != NT_QTFR &&
+      type != NT_ENCLOSE)
     fprintf(f, "\n");
   fflush(f);
 }
index 958917e122686936f1f6f31efe186eb8e0ff6127..80903508b8c7ad5396caf5639491620ceadc90c2 100644 (file)
@@ -55,7 +55,7 @@ onigenc_get_right_adjust_char_head(OnigEncoding enc, const UChar* start, const U
 {
   UChar* p = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s);
   if (p < s) {
-    p += enc_len(enc, p);
+    p += enclen(enc, p);
   }
   return p;
 }
@@ -68,7 +68,7 @@ onigenc_get_right_adjust_char_head_with_prev(OnigEncoding enc,
 
   if (p < s) {
     if (prev) *prev = (const UChar* )p;
-    p += enc_len(enc, p);
+    p += enclen(enc, p);
   }
   else {
     if (prev) *prev = (const UChar* )NULL; /* Sorry */
@@ -169,52 +169,7 @@ onigenc_str_bytelen_null(OnigEncoding enc, const UChar* s)
   }
 }
 
-#ifndef ONIG_RUBY_M17N
-
-#ifndef NOT_RUBY
-
-#define USE_APPLICATION_TO_LOWER_CASE_TABLE
-
-const unsigned short OnigEnc_Unicode_ISO_8859_1_CtypeTable[256] = {
-  0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
-  0x2008, 0x228c, 0x2289, 0x2288, 0x2288, 0x2288, 0x2008, 0x2008,
-  0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
-  0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
-  0x2284, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
-  0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
-  0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0,
-  0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
-  0x21a0, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x34a2,
-  0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
-  0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
-  0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x31a0,
-  0x21a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2,
-  0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
-  0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
-  0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x2008,
-  0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0288, 0x0008, 0x0008,
-  0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
-  0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
-  0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
-  0x0284, 0x01a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0,
-  0x00a0, 0x00a0, 0x10e2, 0x01a0, 0x00a0, 0x00a8, 0x00a0, 0x00a0,
-  0x00a0, 0x00a0, 0x10a0, 0x10a0, 0x00a0, 0x10e2, 0x00a0, 0x01a0,
-  0x00a0, 0x10a0, 0x10e2, 0x01a0, 0x10a0, 0x10a0, 0x10a0, 0x01a0,
-  0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2,
-  0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2,
-  0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x00a0,
-  0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x14a2, 0x10e2,
-  0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2,
-  0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2,
-  0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x00a0,
-  0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2
-};
-#endif
-
-const UChar* OnigEncAsciiToLowerCaseTable = (const UChar* )0;
-
-#ifndef USE_APPLICATION_TO_LOWER_CASE_TABLE
-static const UChar BuiltInAsciiToLowerCaseTable[] = {
+const UChar OnigEncAsciiToLowerCaseTable[] = {
   '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
   '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017',
   '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027',
@@ -248,7 +203,6 @@ static const UChar BuiltInAsciiToLowerCaseTable[] = {
   '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367',
   '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377',
 };
-#endif /* not USE_APPLICATION_TO_LOWER_CASE_TABLE */
 
 #ifdef USE_UPPER_CASE_TABLE
 const UChar OnigEncAsciiToUpperCaseTable[256] = {
@@ -288,23 +242,22 @@ const UChar OnigEncAsciiToUpperCaseTable[256] = {
 #endif
 
 const unsigned short OnigEncAsciiCtypeTable[256] = {
-  0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
-  0x2008, 0x220c, 0x2209, 0x2208, 0x2208, 0x2208, 0x2008, 0x2008,
-  0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
-  0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
-  0x2284, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
-  0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
-  0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0,
-  0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
-  0x21a0, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x34a2,
-  0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
-  0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
-  0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x31a0,
-  0x21a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2,
-  0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
-  0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
-  0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x2008,
-
+  0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
+  0x4008, 0x420c, 0x4209, 0x4208, 0x4208, 0x4208, 0x4008, 0x4008,
+  0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
+  0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
+  0x4284, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
+  0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
+  0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0,
+  0x78b0, 0x78b0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
+  0x41a0, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x74a2,
+  0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2,
+  0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2,
+  0x74a2, 0x74a2, 0x74a2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x51a0,
+  0x41a0, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x70e2,
+  0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2,
+  0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2,
+  0x70e2, 0x70e2, 0x70e2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x4008,
   0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
   0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
   0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
@@ -396,19 +349,10 @@ const UChar OnigEncISO_8859_1_ToUpperCaseTable[256] = {
 #endif
 
 extern void
-onigenc_set_default_caseconv_table(const UChar* table)
+onigenc_set_default_caseconv_table(const UChar* table ARG_UNUSED)
 {
-  if (table == (const UChar* )0) {
-#ifndef USE_APPLICATION_TO_LOWER_CASE_TABLE
-    table = BuiltInAsciiToLowerCaseTable;
-#else
-    return ;
-#endif
-  }
-
-  if (table != OnigEncAsciiToLowerCaseTable) {
-    OnigEncAsciiToLowerCaseTable = table;
-  }
+  /* nothing */
+  /* obsoleted. */
 }
 
 extern UChar*
@@ -417,7 +361,7 @@ onigenc_get_left_adjust_char_head(OnigEncoding enc, const UChar* start, const UC
   return ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s);
 }
 
-const OnigPairAmbigCodes OnigAsciiPairAmbigCodes[] = {
+const OnigPairCaseFoldCodes OnigAsciiLowerMap[] = {
   { 0x41, 0x61 },
   { 0x42, 0x62 },
   { 0x43, 0x63 },
@@ -443,157 +387,175 @@ const OnigPairAmbigCodes OnigAsciiPairAmbigCodes[] = {
   { 0x57, 0x77 },
   { 0x58, 0x78 },
   { 0x59, 0x79 },
-  { 0x5a, 0x7a },
-
-  { 0x61, 0x41 },
-  { 0x62, 0x42 },
-  { 0x63, 0x43 },
-  { 0x64, 0x44 },
-  { 0x65, 0x45 },
-  { 0x66, 0x46 },
-  { 0x67, 0x47 },
-  { 0x68, 0x48 },
-  { 0x69, 0x49 },
-  { 0x6a, 0x4a },
-  { 0x6b, 0x4b },
-  { 0x6c, 0x4c },
-  { 0x6d, 0x4d },
-  { 0x6e, 0x4e },
-  { 0x6f, 0x4f },
-  { 0x70, 0x50 },
-  { 0x71, 0x51 },
-  { 0x72, 0x52 },
-  { 0x73, 0x53 },
-  { 0x74, 0x54 },
-  { 0x75, 0x55 },
-  { 0x76, 0x56 },
-  { 0x77, 0x57 },
-  { 0x78, 0x58 },
-  { 0x79, 0x59 },
-  { 0x7a, 0x5a }
+  { 0x5a, 0x7a }
 };
 
 extern int
-onigenc_ascii_get_all_pair_ambig_codes(OnigAmbigType flag,
-                                       const OnigPairAmbigCodes** ccs)
+onigenc_ascii_apply_all_case_fold(OnigCaseFoldType flag ARG_UNUSED,
+                                 OnigApplyAllCaseFoldFunc f, void* arg)
 {
-  if (flag == ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) {
-    *ccs = OnigAsciiPairAmbigCodes;
-    return (sizeof(OnigAsciiPairAmbigCodes) / sizeof(OnigPairAmbigCodes));
-  }
-  else {
-    return 0;
+  OnigCodePoint code;
+  int i, r;
+
+  for (i = 0;
+       i < (int )(sizeof(OnigAsciiLowerMap)/sizeof(OnigPairCaseFoldCodes));
+       i++) {
+    code = OnigAsciiLowerMap[i].to;
+    r = (*f)(OnigAsciiLowerMap[i].from, &code, 1, arg);
+    if (r != 0) return r;
+
+    code = OnigAsciiLowerMap[i].from;
+    r = (*f)(OnigAsciiLowerMap[i].to, &code, 1, arg);
+    if (r != 0) return r;
   }
-}
 
-extern int
-onigenc_nothing_get_all_comp_ambig_codes(OnigAmbigType flag,
-                                         const OnigCompAmbigCodes** ccs)
-{
   return 0;
 }
 
 extern int
-onigenc_iso_8859_1_get_all_pair_ambig_codes(OnigAmbigType flag,
-                                            const OnigPairAmbigCodes** ccs)
+onigenc_ascii_get_case_fold_codes_by_str(OnigCaseFoldType flag ARG_UNUSED,
+        const OnigUChar* p, const OnigUChar* end ARG_UNUSED,
+        OnigCaseFoldCodeItem items[])
 {
-  static const OnigPairAmbigCodes cc[] = {
-    { 0xc0, 0xe0 },
-    { 0xc1, 0xe1 },
-    { 0xc2, 0xe2 },
-    { 0xc3, 0xe3 },
-    { 0xc4, 0xe4 },
-    { 0xc5, 0xe5 },
-    { 0xc6, 0xe6 },
-    { 0xc7, 0xe7 },
-    { 0xc8, 0xe8 },
-    { 0xc9, 0xe9 },
-    { 0xca, 0xea },
-    { 0xcb, 0xeb },
-    { 0xcc, 0xec },
-    { 0xcd, 0xed },
-    { 0xce, 0xee },
-    { 0xcf, 0xef },
-
-    { 0xd0, 0xf0 },
-    { 0xd1, 0xf1 },
-    { 0xd2, 0xf2 },
-    { 0xd3, 0xf3 },
-    { 0xd4, 0xf4 },
-    { 0xd5, 0xf5 },
-    { 0xd6, 0xf6 },
-    { 0xd8, 0xf8 },
-    { 0xd9, 0xf9 },
-    { 0xda, 0xfa },
-    { 0xdb, 0xfb },
-    { 0xdc, 0xfc },
-    { 0xdd, 0xfd },
-    { 0xde, 0xfe },
-
-    { 0xe0, 0xc0 },
-    { 0xe1, 0xc1 },
-    { 0xe2, 0xc2 },
-    { 0xe3, 0xc3 },
-    { 0xe4, 0xc4 },
-    { 0xe5, 0xc5 },
-    { 0xe6, 0xc6 },
-    { 0xe7, 0xc7 },
-    { 0xe8, 0xc8 },
-    { 0xe9, 0xc9 },
-    { 0xea, 0xca },
-    { 0xeb, 0xcb },
-    { 0xec, 0xcc },
-    { 0xed, 0xcd },
-    { 0xee, 0xce },
-    { 0xef, 0xcf },
-
-    { 0xf0, 0xd0 },
-    { 0xf1, 0xd1 },
-    { 0xf2, 0xd2 },
-    { 0xf3, 0xd3 },
-    { 0xf4, 0xd4 },
-    { 0xf5, 0xd5 },
-    { 0xf6, 0xd6 },
-    { 0xf8, 0xd8 },
-    { 0xf9, 0xd9 },
-    { 0xfa, 0xda },
-    { 0xfb, 0xdb },
-    { 0xfc, 0xdc },
-    { 0xfd, 0xdd },
-    { 0xfe, 0xde }
-  };
-
-  if (flag == ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) {
-    *ccs = OnigAsciiPairAmbigCodes;
-    return (sizeof(OnigAsciiPairAmbigCodes) / sizeof(OnigPairAmbigCodes));
+  if (0x41 <= *p && *p <= 0x5a) {
+    items[0].byte_len = 1;
+    items[0].code_len = 1;
+    items[0].code[0] = (OnigCodePoint )(*p + 0x20);
+    return 1;
   }
-  else if (flag == ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) {
-    *ccs = cc;
-    return sizeof(cc) / sizeof(OnigPairAmbigCodes);
+  else if (0x61 <= *p && *p <= 0x7a) {
+    items[0].byte_len = 1;
+    items[0].code_len = 1;
+    items[0].code[0] = (OnigCodePoint )(*p - 0x20);
+    return 1;
   }
   else
     return 0;
 }
 
+static int
+ss_apply_all_case_fold(OnigCaseFoldType flag ARG_UNUSED,
+                      OnigApplyAllCaseFoldFunc f, void* arg)
+{
+  static OnigCodePoint ss[] = { 0x73, 0x73 };
+
+  return (*f)((OnigCodePoint )0xdf, ss, 2, arg);
+}
+
 extern int
-onigenc_ess_tsett_get_all_comp_ambig_codes(OnigAmbigType flag,
-                                           const OnigCompAmbigCodes** ccs)
+onigenc_apply_all_case_fold_with_map(int map_size,
+    const OnigPairCaseFoldCodes map[],
+    int ess_tsett_flag, OnigCaseFoldType flag,
+    OnigApplyAllCaseFoldFunc f, void* arg)
 {
-  static const OnigCompAmbigCodes folds[] = {
-    { 2, 0xdf, {{ 2, { 0x53, 0x53 } }, { 2, { 0x73, 0x73} } } }
-  };
+  OnigCodePoint code;
+  int i, r;
+
+  r = onigenc_ascii_apply_all_case_fold(flag, f, arg);
+  if (r != 0) return r;
 
-  if (flag == ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) {
-    *ccs = folds;
-    return sizeof(folds) / sizeof(OnigCompAmbigCodes);
+  for (i = 0; i < map_size; i++) {
+    code = map[i].to;
+    r = (*f)(map[i].from, &code, 1, arg);
+    if (r != 0) return r;
+
+    code = map[i].from;
+    r = (*f)(map[i].to, &code, 1, arg);
+    if (r != 0) return r;
   }
-  else
-    return 0;
+
+  if (ess_tsett_flag != 0)
+    return ss_apply_all_case_fold(flag, f, arg);
+
+  return 0;
 }
 
 extern int
-onigenc_not_support_get_ctype_code_range(int ctype,
-                             const OnigCodePoint* sbr[], const OnigCodePoint* mbr[])
+onigenc_get_case_fold_codes_by_str_with_map(int map_size,
+    const OnigPairCaseFoldCodes map[],
+    int ess_tsett_flag, OnigCaseFoldType flag ARG_UNUSED,
+    const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[])
+{
+  if (0x41 <= *p && *p <= 0x5a) {
+    items[0].byte_len = 1;
+    items[0].code_len = 1;
+    items[0].code[0] = (OnigCodePoint )(*p + 0x20);
+    if (*p == 0x53 && ess_tsett_flag != 0 && end > p + 1
+       && (*(p+1) == 0x53 || *(p+1) == 0x73)) {
+      /* SS */
+      items[1].byte_len = 2;
+      items[1].code_len = 1;
+      items[1].code[0] = (OnigCodePoint )0xdf;
+      return 2;
+    }
+    else
+      return 1;
+  }
+  else if (0x61 <= *p && *p <= 0x7a) {
+    items[0].byte_len = 1;
+    items[0].code_len = 1;
+    items[0].code[0] = (OnigCodePoint )(*p - 0x20);
+    if (*p == 0x73 && ess_tsett_flag != 0 && end > p + 1
+       && (*(p+1) == 0x73 || *(p+1) == 0x53)) {
+      /* ss */
+      items[1].byte_len = 2;
+      items[1].code_len = 1;
+      items[1].code[0] = (OnigCodePoint )0xdf;
+      return 2;
+    }
+    else
+      return 1;
+  }
+  else if (*p == 0xdf && ess_tsett_flag != 0) {
+    items[0].byte_len = 1;
+    items[0].code_len = 2;
+    items[0].code[0] = (OnigCodePoint )'s';
+    items[0].code[1] = (OnigCodePoint )'s';
+
+    items[1].byte_len = 1;
+    items[1].code_len = 2;
+    items[1].code[0] = (OnigCodePoint )'S';
+    items[1].code[1] = (OnigCodePoint )'S';
+
+    items[2].byte_len = 1;
+    items[2].code_len = 2;
+    items[2].code[0] = (OnigCodePoint )'s';
+    items[2].code[1] = (OnigCodePoint )'S';
+
+    items[3].byte_len = 1;
+    items[3].code_len = 2;
+    items[3].code[0] = (OnigCodePoint )'S';
+    items[3].code[1] = (OnigCodePoint )'s';
+
+    return 4;
+  }
+  else {
+    int i;
+
+    for (i = 0; i < map_size; i++) {
+      if (*p == map[i].from) {
+       items[0].byte_len = 1;
+       items[0].code_len = 1;
+       items[0].code[0] = map[i].to;
+       return 1;
+      }
+      else if (*p == map[i].to) {
+       items[0].byte_len = 1;
+       items[0].code_len = 1;
+       items[0].code[0] = map[i].from;
+       return 1;
+      }
+    }
+  }
+
+  return 0;
+}
+
+
+extern int
+onigenc_not_support_get_ctype_code_range(OnigCtype ctype ARG_UNUSED,
+        OnigCodePoint* sb_out ARG_UNUSED,
+        const OnigCodePoint* ranges[] ARG_UNUSED)
 {
   return ONIG_NO_SUPPORT_CONFIG;
 }
@@ -609,57 +571,43 @@ onigenc_is_mbc_newline_0x0a(const UChar* p, const UChar* end)
 
 /* for single byte encodings */
 extern int
-onigenc_ascii_mbc_to_normalize(OnigAmbigType flag, const UChar** p, const UChar*end,
-                               UChar* lower)
+onigenc_ascii_mbc_case_fold(OnigCaseFoldType flag ARG_UNUSED, const UChar** p,
+           const UChar*end ARG_UNUSED, UChar* lower)
 {
-  if ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0) {
-    *lower = ONIGENC_ASCII_CODE_TO_LOWER_CASE(**p);
-  }
-  else {
-    *lower = **p;
-  }
+  *lower = ONIGENC_ASCII_CODE_TO_LOWER_CASE(**p);
 
   (*p)++;
   return 1; /* return byte length of converted char to lower */
 }
 
+#if 0
 extern int
-onigenc_ascii_is_mbc_ambiguous(OnigAmbigType flag,
+onigenc_ascii_is_mbc_ambiguous(OnigCaseFoldType flag,
                               const UChar** pp, const UChar* end)
 {
   const UChar* p = *pp;
 
   (*pp)++;
-  if ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0) {
-    return ONIGENC_IS_ASCII_CODE_CASE_AMBIG(*p);
-  }
-  else {
-    return FALSE;
-  }
+  return ONIGENC_IS_ASCII_CODE_CASE_AMBIG(*p);
 }
+#endif
 
 extern int
-onigenc_single_byte_mbc_enc_len(const UChar* p)
+onigenc_single_byte_mbc_enc_len(const UChar* p ARG_UNUSED)
 {
   return 1;
 }
 
 extern OnigCodePoint
-onigenc_single_byte_mbc_to_code(const UChar* p, const UChar* end)
+onigenc_single_byte_mbc_to_code(const UChar* p, const UChar* end ARG_UNUSED)
 {
   return (OnigCodePoint )(*p);
 }
 
 extern int
-onigenc_single_byte_code_to_mbclen(OnigCodePoint code)
-{
-  return 1;
-}
-
-extern int
-onigenc_single_byte_code_to_mbc_first(OnigCodePoint code)
+onigenc_single_byte_code_to_mbclen(OnigCodePoint code ARG_UNUSED)
 {
-  return (code & 0xff);
+  return (code < 0x100 ? 1 : ONIGERR_INVALID_CODE_POINT_VALUE);
 }
 
 extern int
@@ -670,19 +618,22 @@ onigenc_single_byte_code_to_mbc(OnigCodePoint code, UChar *buf)
 }
 
 extern UChar*
-onigenc_single_byte_left_adjust_char_head(const UChar* start, const UChar* s)
+onigenc_single_byte_left_adjust_char_head(const UChar* start ARG_UNUSED,
+                                         const UChar* s)
 {
   return (UChar* )s;
 }
 
 extern int
-onigenc_always_true_is_allowed_reverse_match(const UChar* s, const UChar* end)
+onigenc_always_true_is_allowed_reverse_match(const UChar* s   ARG_UNUSED,
+                                            const UChar* end ARG_UNUSED)
 {
   return TRUE;
 }
 
 extern int
-onigenc_always_false_is_allowed_reverse_match(const UChar* s, const UChar* end)
+onigenc_always_false_is_allowed_reverse_match(const UChar* s   ARG_UNUSED,
+                                             const UChar* end ARG_UNUSED)
 {
   return FALSE;
 }
@@ -693,7 +644,7 @@ onigenc_mbn_mbc_to_code(OnigEncoding enc, const UChar* p, const UChar* end)
   int c, i, len;
   OnigCodePoint n;
 
-  len = enc_len(enc, p);
+  len = enclen(enc, p);
   n = (OnigCodePoint )(*p++);
   if (len == 1) return n;
 
@@ -706,54 +657,46 @@ onigenc_mbn_mbc_to_code(OnigEncoding enc, const UChar* p, const UChar* end)
 }
 
 extern int
-onigenc_mbn_mbc_to_normalize(OnigEncoding enc, OnigAmbigType flag,
-                             const UChar** pp, const UChar* end, UChar* lower)
+onigenc_mbn_mbc_case_fold(OnigEncoding enc, OnigCaseFoldType flag ARG_UNUSED,
+                          const UChar** pp, const UChar* end ARG_UNUSED,
+                         UChar* lower)
 {
   int len;
   const UChar *p = *pp;
 
   if (ONIGENC_IS_MBC_ASCII(p)) {
-    if ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0) {
-      *lower = ONIGENC_ASCII_CODE_TO_LOWER_CASE(*p);
-    }
-    else {
-      *lower = *p;
-    }
+    *lower = ONIGENC_ASCII_CODE_TO_LOWER_CASE(*p);
     (*pp)++;
     return 1;
   }
   else {
-    len = enc_len(enc, p);
-    if (lower != p) {
-      int i;
-      for (i = 0; i < len; i++) {
-       *lower++ = *p++;
-      }
+    int i;
+
+    len = enclen(enc, p);
+    for (i = 0; i < len; i++) {
+      *lower++ = *p++;
     }
     (*pp) += len;
     return len; /* return byte length of converted to lower char */
   }
 }
 
+#if 0
 extern int
-onigenc_mbn_is_mbc_ambiguous(OnigEncoding enc, OnigAmbigType flag,
+onigenc_mbn_is_mbc_ambiguous(OnigEncoding enc, OnigCaseFoldType flag,
                              const UChar** pp, const UChar* end)
 {
   const UChar* p = *pp;
 
   if (ONIGENC_IS_MBC_ASCII(p)) {
     (*pp)++;
-    if ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0) {
-      return ONIGENC_IS_ASCII_CODE_CASE_AMBIG(*p);
-    }
-    else {
-      return FALSE;
-    }
+    return ONIGENC_IS_ASCII_CODE_CASE_AMBIG(*p);
   }
 
-  (*pp) += enc_len(enc, p);
+  (*pp) += enclen(enc, p);
   return FALSE;
 }
+#endif
 
 extern int
 onigenc_mb2_code_to_mbclen(OnigCodePoint code)
@@ -771,40 +714,6 @@ onigenc_mb4_code_to_mbclen(OnigCodePoint code)
   else return 1;
 }
 
-extern int
-onigenc_mb2_code_to_mbc_first(OnigCodePoint code)
-{
-  int first;
-
-  if ((code & 0xff00) != 0) {
-    first = (code >> 8) & 0xff;
-  }
-  else {
-    return (int )code;
-  }
-  return first;
-}
-
-extern int
-onigenc_mb4_code_to_mbc_first(OnigCodePoint code)
-{
-  int first;
-
-  if ((code & 0xff000000) != 0) {
-    first = (code >> 24) & 0xff;
-  }
-  else if ((code & 0xff0000) != 0) {
-    first = (code >> 16) & 0xff;
-  }
-  else if ((code & 0xff00) != 0) {
-    first = (code >>  8) & 0xff;
-  }
-  else {
-    return (int )code;
-  }
-  return first;
-}
-
 extern int
 onigenc_mb2_code_to_mbc(OnigEncoding enc, OnigCodePoint code, UChar *buf)
 {
@@ -816,8 +725,8 @@ onigenc_mb2_code_to_mbc(OnigEncoding enc, OnigCodePoint code, UChar *buf)
   *p++ = (UChar )(code & 0xff);
 
 #if 1
-  if (enc_len(enc, buf) != (p - buf))
-    return ONIGENCERR_INVALID_WIDE_CHAR_VALUE;
+  if (enclen(enc, buf) != (p - buf))
+    return ONIGERR_INVALID_CODE_POINT_VALUE;
 #endif
   return p - buf;
 }
@@ -839,12 +748,46 @@ onigenc_mb4_code_to_mbc(OnigEncoding enc, OnigCodePoint code, UChar *buf)
   *p++ = (UChar )(code & 0xff);
 
 #if 1
-  if (enc_len(enc, buf) != (p - buf))
-    return ONIGENCERR_INVALID_WIDE_CHAR_VALUE;
+  if (enclen(enc, buf) != (p - buf))
+    return ONIGERR_INVALID_CODE_POINT_VALUE;
 #endif
   return p - buf;
 }
 
+extern int
+onigenc_minimum_property_name_to_ctype(OnigEncoding enc, UChar* p, UChar* end)
+{
+  static PosixBracketEntryType PBS[] = {
+    { (UChar* )"Alnum",  ONIGENC_CTYPE_ALNUM,  5 },
+    { (UChar* )"Alpha",  ONIGENC_CTYPE_ALPHA,  5 },
+    { (UChar* )"Blank",  ONIGENC_CTYPE_BLANK,  5 },
+    { (UChar* )"Cntrl",  ONIGENC_CTYPE_CNTRL,  5 },
+    { (UChar* )"Digit",  ONIGENC_CTYPE_DIGIT,  5 },
+    { (UChar* )"Graph",  ONIGENC_CTYPE_GRAPH,  5 },
+    { (UChar* )"Lower",  ONIGENC_CTYPE_LOWER,  5 },
+    { (UChar* )"Print",  ONIGENC_CTYPE_PRINT,  5 },
+    { (UChar* )"Punct",  ONIGENC_CTYPE_PUNCT,  5 },
+    { (UChar* )"Space",  ONIGENC_CTYPE_SPACE,  5 },
+    { (UChar* )"Upper",  ONIGENC_CTYPE_UPPER,  5 },
+    { (UChar* )"XDigit", ONIGENC_CTYPE_XDIGIT, 6 },
+    { (UChar* )"ASCII",  ONIGENC_CTYPE_ASCII,  5 },
+    { (UChar* )"Word",   ONIGENC_CTYPE_WORD,   4 },
+    { (UChar* )NULL, -1, 0 }
+  };
+
+  PosixBracketEntryType *pb;
+  int len;
+
+  len = onigenc_strlen(enc, p, end);
+  for (pb = PBS; IS_NOT_NULL(pb->name); pb++) {
+    if (len == pb->len &&
+        onigenc_with_ascii_strncmp(enc, p, end, pb->name, pb->len) == 0)
+      return pb->ctype;
+  }
+
+  return ONIGERR_INVALID_CHAR_PROPERTY_NAME;
+}
+
 extern int
 onigenc_mb2_is_code_ctype(OnigEncoding enc, OnigCodePoint code,
                          unsigned int ctype)
@@ -852,8 +795,7 @@ onigenc_mb2_is_code_ctype(OnigEncoding enc, OnigCodePoint code,
   if (code < 128)
     return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype);
   else {
-    if ((ctype & (ONIGENC_CTYPE_WORD |
-                  ONIGENC_CTYPE_GRAPH | ONIGENC_CTYPE_PRINT)) != 0) {
+    if (CTYPE_IS_WORD_GRAPH_PRINT(ctype)) {
       return (ONIGENC_CODE_TO_MBCLEN(enc, code) > 1 ? TRUE : FALSE);
     }
   }
@@ -868,8 +810,7 @@ onigenc_mb4_is_code_ctype(OnigEncoding enc, OnigCodePoint code,
   if (code < 128)
     return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype);
   else {
-    if ((ctype & (ONIGENC_CTYPE_WORD |
-                  ONIGENC_CTYPE_GRAPH | ONIGENC_CTYPE_PRINT)) != 0) {
+    if (CTYPE_IS_WORD_GRAPH_PRINT(ctype)) {
       return (ONIGENC_CODE_TO_MBCLEN(enc, code) > 1 ? TRUE : FALSE);
     }
   }
@@ -891,138 +832,71 @@ onigenc_with_ascii_strncmp(OnigEncoding enc, const UChar* p, const UChar* end,
     if (x) return x;
 
     sascii++;
-    p += enc_len(enc, p);
+    p += enclen(enc, p);
   }
   return 0;
 }
 
-#else /* ONIG_RUBY_M17N */
-
-extern int
-onigenc_is_code_ctype(OnigEncoding enc, OnigCodePoint code, int ctype)
+/* Property management */
+static int
+resize_property_list(int new_size, const OnigCodePoint*** plist, int* psize)
 {
-  switch (ctype) {
-  case ONIGENC_CTYPE_NEWLINE:
-    if (code == 0x0a) return 1;
-    break;
-
-  case ONIGENC_CTYPE_ALPHA:
-    return m17n_isalpha(enc, code);
-    break;
-  case ONIGENC_CTYPE_BLANK:
-    return ONIGENC_IS_CODE_BLANK(enc, (int )(code));
-    break;
-  case ONIGENC_CTYPE_CNTRL:
-    return m17n_iscntrl(enc, code);
-    break;
-  case ONIGENC_CTYPE_DIGIT:
-    return m17n_isdigit(enc, code);
-    break;
-  case ONIGENC_CTYPE_GRAPH:
-    return ONIGENC_IS_CODE_GRAPH(enc, (int )(code));
-    break;
-  case ONIGENC_CTYPE_LOWER:
-    return m17n_islower(enc, code);
-    break;
-  case ONIGENC_CTYPE_PRINT:
-    return m17n_isprint(enc, code);
-    break;
-  case ONIGENC_CTYPE_PUNCT:
-    return m17n_ispunct(enc, code);
-    break;
-  case ONIGENC_CTYPE_SPACE:
-    return m17n_isspace(enc, code);
-    break;
-  case ONIGENC_CTYPE_UPPER:
-    return m17n_isupper(enc, code);
-    break;
-  case ONIGENC_CTYPE_XDIGIT:
-    return m17n_isxdigit(enc, code);
-    break;
-  case ONIGENC_CTYPE_WORD:
-    return m17n_iswchar(enc, code);
-    break;
-  case ONIGENC_CTYPE_ASCII:
-    return (code < 128 ? TRUE : FALSE);
-    break;
-  case ONIGENC_CTYPE_ALNUM:
-    return m17n_isalnum(enc, code);
-    break;
-  default:
-    break;
+  int size;
+  const OnigCodePoint **list = *plist;
+
+  size = sizeof(OnigCodePoint*) * new_size;
+  if (IS_NULL(list)) {
+    list = (const OnigCodePoint** )xmalloc(size);
+  }
+  else {
+    list = (const OnigCodePoint** )xrealloc((void* )list, size);
   }
 
-  return 0;
-}
+  if (IS_NULL(list)) return ONIGERR_MEMORY;
 
-extern int
-onigenc_code_to_mbc(OnigEncoding enc, OnigCodePoint code, UChar *buf)
-{
-  int c, len;
+  *plist = list;
+  *psize = new_size;
 
-  m17n_mbcput(enc, code, buf);
-  c = m17n_firstbyte(enc, code);
-  len = enc_len(enc, c);
-  return len;
+  return 0;
 }
 
 extern int
-onigenc_mbc_to_lower(OnigEncoding enc, UChar* p, UChar* buf)
+onigenc_property_list_add_property(UChar* name, const OnigCodePoint* prop,
+     hash_table_type **table, const OnigCodePoint*** plist, int *pnum,
+     int *psize)
 {
-  unsigned int c, low;
-
-  c   = m17n_codepoint(enc, p, p + enc_len(enc, *p));
-  low = m17n_tolower(enc, c);
-  m17n_mbcput(enc, low, buf);
+#define PROP_INIT_SIZE     16
 
-  return m17n_codelen(enc, low);
-}
+  int r;
 
-extern int
-onigenc_is_mbc_ambiguous(OnigEncoding enc, OnigAmbigType flag,
-                         UChar** pp, UChar* end)
-{
-  int len;
-  unsigned int c;
-  UChar* p = *pp;
+  if (*psize <= *pnum) {
+    int new_size = (*psize == 0 ? PROP_INIT_SIZE : *psize * 2);
+    r = resize_property_list(new_size, plist, psize);
+    if (r != 0) return r;
+  }
 
-  len = enc_len(enc, *p);
-  (*pp) += len;
-  c = m17n_codepoint(enc, p, p + len);
+  (*plist)[*pnum] = prop;
 
-  if ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0) {
-    if (m17n_isupper(enc, c) || m17n_islower(enc, c))
-      return TRUE;
+  if (ONIG_IS_NULL(*table)) {
+    *table = onig_st_init_strend_table_with_size(PROP_INIT_SIZE);
+    if (ONIG_IS_NULL(*table)) return ONIGERR_MEMORY;
   }
 
-  return FALSE;
+  *pnum = *pnum + 1;
+  onig_st_insert_strend(*table, name, name + strlen((char* )name),
+                       (hash_data_type )(*pnum + ONIGENC_MAX_STD_CTYPE));
+  return 0;
 }
 
-extern UChar*
-onigenc_get_left_adjust_char_head(OnigEncoding enc, UChar* start, UChar* s)
+extern int
+onigenc_property_list_init(int (*f)(void))
 {
-  UChar *p;
-  int len;
+  int r;
 
-  if (s <= start) return s;
-  p = s;
+  THREAD_ATOMIC_START;
 
-  while (!m17n_islead(enc, *p) && p > start) p--;
-  while (p + (len = enc_len(enc, *p)) < s) {
-    p += len;
-  }
-  if (p + len == s) return s;
-  return p;
-}
+  r = f();
 
-extern int
-onigenc_is_allowed_reverse_match(OnigEncoding enc,
-                                const UChar* s, const UChar* end)
-{
-  return ONIGENC_IS_SINGLEBYTE(enc);
+  THREAD_ATOMIC_END;
+  return r;
 }
-
-extern void
-onigenc_set_default_caseconv_table(UChar* table) { }
-
-#endif /* ONIG_RUBY_M17N */
index 58ee3e7f22f0b3ae605adca7548ab9b4f8ef88c7..40963280dc70420519bb2677d00b06d592b9e0af 100644 (file)
@@ -4,7 +4,7 @@
   regenc.h -  Oniguruma (regular expression library)
 **********************************************************************/
 /*-
- * Copyright (c) 2002-2006  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp>
+ * Copyright (c) 2002-2008  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * SUCH DAMAGE.
  */
 
-#ifndef RUBY_PLATFORM
+#ifndef PACKAGE
+/* PACKAGE is defined in config.h */
 #include "config.h"
 #endif
+
+#ifdef ONIG_ESCAPE_UCHAR_COLLISION
+#undef ONIG_ESCAPE_UCHAR_COLLISION
+#endif
+
 #include "oniguruma.h"
 
+typedef struct {
+  OnigCodePoint from;
+  OnigCodePoint to;
+} OnigPairCaseFoldCodes;
+
+
 #ifndef NULL
 #define NULL   ((void* )0)
 #endif
 #define FALSE   0
 #endif
 
-/* error codes */
-#define ONIGENCERR_MEMORY                                         -5
-#define ONIGENCERR_TYPE_BUG                                       -6
-#define ONIGENCERR_INVALID_WIDE_CHAR_VALUE                      -400
-#define ONIGENCERR_TOO_BIG_WIDE_CHAR_VALUE                      -401
+#ifndef ARG_UNUSED
+#if defined(__GNUC__)
+#  define ARG_UNUSED  __attribute__ ((unused))
+#else
+#  define ARG_UNUSED
+#endif
+#endif
 
 #define ONIG_IS_NULL(p)                    (((void*)(p)) == (void*)0)
 #define ONIG_IS_NOT_NULL(p)                (((void*)(p)) != (void*)0)
 #define ONIG_CHECK_NULL_RETURN(p)          if (ONIG_IS_NULL(p)) return NULL
 #define ONIG_CHECK_NULL_RETURN_VAL(p,val)  if (ONIG_IS_NULL(p)) return (val)
 
-
-#ifdef ONIG_RUBY_M17N
-
-#define ONIG_ENCODING_INIT_DEFAULT            ONIG_ENCODING_UNDEF
-
-#else  /* ONIG_RUBY_M17N */
-
-#define USE_UNICODE_FULL_RANGE_CTYPE
-/* following must not use with USE_CRNL_AS_LINE_TERMINATOR */
+#define enclen(enc,p)      ONIGENC_MBC_ENC_LEN(enc,p)
+
+/* character types bit flag */
+#define BIT_CTYPE_NEWLINE  (1<< ONIGENC_CTYPE_NEWLINE)
+#define BIT_CTYPE_ALPHA    (1<< ONIGENC_CTYPE_ALPHA)
+#define BIT_CTYPE_BLANK    (1<< ONIGENC_CTYPE_BLANK)
+#define BIT_CTYPE_CNTRL    (1<< ONIGENC_CTYPE_CNTRL)
+#define BIT_CTYPE_DIGIT    (1<< ONIGENC_CTYPE_DIGIT)
+#define BIT_CTYPE_GRAPH    (1<< ONIGENC_CTYPE_GRAPH)
+#define BIT_CTYPE_LOWER    (1<< ONIGENC_CTYPE_LOWER)
+#define BIT_CTYPE_PRINT    (1<< ONIGENC_CTYPE_PRINT)
+#define BIT_CTYPE_PUNCT    (1<< ONIGENC_CTYPE_PUNCT)
+#define BIT_CTYPE_SPACE    (1<< ONIGENC_CTYPE_SPACE)
+#define BIT_CTYPE_UPPER    (1<< ONIGENC_CTYPE_UPPER)
+#define BIT_CTYPE_XDIGIT   (1<< ONIGENC_CTYPE_XDIGIT)
+#define BIT_CTYPE_WORD     (1<< ONIGENC_CTYPE_WORD)
+#define BIT_CTYPE_ALNUM    (1<< ONIGENC_CTYPE_ALNUM)
+#define BIT_CTYPE_ASCII    (1<< ONIGENC_CTYPE_ASCII)
+
+#define CTYPE_TO_BIT(ctype)  (1<<(ctype))
+#define CTYPE_IS_WORD_GRAPH_PRINT(ctype) \
+  ((ctype) == ONIGENC_CTYPE_WORD || (ctype) == ONIGENC_CTYPE_GRAPH ||\
+   (ctype) == ONIGENC_CTYPE_PRINT)
+
+
+typedef struct {
+  UChar    *name;
+  int       ctype;
+  short int len;
+} PosixBracketEntryType;
+
+
+/* #define USE_CRNL_AS_LINE_TERMINATOR */
+#define USE_UNICODE_PROPERTIES
+/* #define USE_UNICODE_CASE_FOLD_TURKISH_AZERI */
 /* #define USE_UNICODE_ALL_LINE_TERMINATORS */  /* see Unicode.org UTF#18 */
 
+
 #define ONIG_ENCODING_INIT_DEFAULT           ONIG_ENCODING_ASCII
 
 /* for encoding system implementation (internal) */
-ONIG_EXTERN int onigenc_ascii_get_all_pair_ambig_codes P_((OnigAmbigType flag, const OnigPairAmbigCodes** acs));
-ONIG_EXTERN int onigenc_nothing_get_all_comp_ambig_codes P_((OnigAmbigType flag, const OnigCompAmbigCodes** acs));
-ONIG_EXTERN int onigenc_iso_8859_1_get_all_pair_ambig_codes P_((OnigAmbigType flag, const OnigPairAmbigCodes** acs));
-ONIG_EXTERN int onigenc_ess_tsett_get_all_comp_ambig_codes P_((OnigAmbigType flag, const OnigCompAmbigCodes** acs));
-ONIG_EXTERN int onigenc_not_support_get_ctype_code_range P_((int ctype, const OnigCodePoint* sbr[], const OnigCodePoint* mbr[]));
+ONIG_EXTERN int onigenc_ascii_apply_all_case_fold P_((OnigCaseFoldType flag, OnigApplyAllCaseFoldFunc f, void* arg));
+ONIG_EXTERN int onigenc_ascii_get_case_fold_codes_by_str P_((OnigCaseFoldType flag, const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[]));
+ONIG_EXTERN int onigenc_apply_all_case_fold_with_map P_((int map_size, const OnigPairCaseFoldCodes map[], int ess_tsett_flag, OnigCaseFoldType flag, OnigApplyAllCaseFoldFunc f, void* arg));
+ONIG_EXTERN int onigenc_get_case_fold_codes_by_str_with_map P_((int map_size, const OnigPairCaseFoldCodes map[], int ess_tsett_flag, OnigCaseFoldType flag, const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[]));
+ONIG_EXTERN int onigenc_not_support_get_ctype_code_range P_((OnigCtype ctype, OnigCodePoint* sb_out, const OnigCodePoint* ranges[]));
 ONIG_EXTERN int onigenc_is_mbc_newline_0x0a P_((const UChar* p, const UChar* end));
 
+
 /* methods for single byte encoding */
-ONIG_EXTERN int onigenc_ascii_mbc_to_normalize P_((OnigAmbigType flag, const UChar** p, const UChar* end, UChar* lower));
-ONIG_EXTERN int onigenc_ascii_is_mbc_ambiguous P_((OnigAmbigType flag, const UChar** p, const UChar* end));
+ONIG_EXTERN int onigenc_ascii_mbc_case_fold P_((OnigCaseFoldType flag, const UChar** p, const UChar* end, UChar* lower));
 ONIG_EXTERN int onigenc_single_byte_mbc_enc_len P_((const UChar* p));
 ONIG_EXTERN OnigCodePoint onigenc_single_byte_mbc_to_code P_((const UChar* p, const UChar* end));
 ONIG_EXTERN int onigenc_single_byte_code_to_mbclen P_((OnigCodePoint code));
-ONIG_EXTERN int onigenc_single_byte_code_to_mbc_first P_((OnigCodePoint code));
 ONIG_EXTERN int onigenc_single_byte_code_to_mbc P_((OnigCodePoint code, UChar *buf));
 ONIG_EXTERN UChar* onigenc_single_byte_left_adjust_char_head P_((const UChar* start, const UChar* s));
 ONIG_EXTERN int onigenc_always_true_is_allowed_reverse_match P_((const UChar* s, const UChar* end));
@@ -92,37 +132,36 @@ ONIG_EXTERN int onigenc_always_false_is_allowed_reverse_match P_((const UChar* s
 
 /* methods for multi byte encoding */
 ONIG_EXTERN OnigCodePoint onigenc_mbn_mbc_to_code P_((OnigEncoding enc, const UChar* p, const UChar* end));
-ONIG_EXTERN int onigenc_mbn_mbc_to_normalize P_((OnigEncoding enc, OnigAmbigType flag, const UChar** p, const UChar* end, UChar* lower));
-ONIG_EXTERN int onigenc_mbn_is_mbc_ambiguous P_((OnigEncoding enc, OnigAmbigType flag, const UChar** p, const UChar* end));
+ONIG_EXTERN int onigenc_mbn_mbc_case_fold P_((OnigEncoding enc, OnigCaseFoldType flag, const UChar** p, const UChar* end, UChar* lower));
 ONIG_EXTERN int onigenc_mb2_code_to_mbclen P_((OnigCodePoint code));
-ONIG_EXTERN int onigenc_mb2_code_to_mbc_first P_((OnigCodePoint code));
 ONIG_EXTERN int onigenc_mb2_code_to_mbc P_((OnigEncoding enc, OnigCodePoint code, UChar *buf));
+ONIG_EXTERN int onigenc_minimum_property_name_to_ctype P_((OnigEncoding enc, UChar* p, UChar* end));
+ONIG_EXTERN int onigenc_unicode_property_name_to_ctype P_((OnigEncoding enc, UChar* p, UChar* end));
 ONIG_EXTERN int onigenc_mb2_is_code_ctype P_((OnigEncoding enc, OnigCodePoint code, unsigned int ctype));
 ONIG_EXTERN int onigenc_mb4_code_to_mbclen P_((OnigCodePoint code));
-ONIG_EXTERN int onigenc_mb4_code_to_mbc_first P_((OnigCodePoint code));
 ONIG_EXTERN int onigenc_mb4_code_to_mbc P_((OnigEncoding enc, OnigCodePoint code, UChar *buf));
 ONIG_EXTERN int onigenc_mb4_is_code_ctype P_((OnigEncoding enc, OnigCodePoint code, unsigned int ctype));
 
-ONIG_EXTERN int onigenc_get_all_fold_match_code_ss_0xdf P_((OnigCodePoint** codes));
 
 /* in enc/unicode.c */
 ONIG_EXTERN int onigenc_unicode_is_code_ctype P_((OnigCodePoint code, unsigned int ctype));
-ONIG_EXTERN int onigenc_unicode_get_ctype_code_range P_((int ctype, const OnigCodePoint* sbr[], const OnigCodePoint* mbr[]));
+ONIG_EXTERN int onigenc_utf16_32_get_ctype_code_range P_((OnigCtype ctype, OnigCodePoint *sb_out, const OnigCodePoint* ranges[]));
+ONIG_EXTERN int onigenc_unicode_ctype_code_range P_((int ctype, const OnigCodePoint* ranges[]));
+ONIG_EXTERN int onigenc_unicode_get_case_fold_codes_by_str P_((OnigEncoding enc, OnigCaseFoldType flag, const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[]));
+ONIG_EXTERN int onigenc_unicode_mbc_case_fold P_((OnigEncoding enc, OnigCaseFoldType flag, const UChar** pp, const UChar* end, UChar* fold));
+ONIG_EXTERN int onigenc_unicode_apply_all_case_fold P_((OnigCaseFoldType flag, OnigApplyAllCaseFoldFunc f, void* arg));
+
 
+#define UTF16_IS_SURROGATE_FIRST(c)    (((c) & 0xfc) == 0xd8)
+#define UTF16_IS_SURROGATE_SECOND(c)   (((c) & 0xfc) == 0xdc)
 
 #define ONIGENC_ISO_8859_1_TO_LOWER_CASE(c) \
   OnigEncISO_8859_1_ToLowerCaseTable[c]
 #define ONIGENC_ISO_8859_1_TO_UPPER_CASE(c) \
   OnigEncISO_8859_1_ToUpperCaseTable[c]
-#define ONIGENC_IS_UNICODE_ISO_8859_1_CTYPE(code,ctype) \
-  ((OnigEnc_Unicode_ISO_8859_1_CtypeTable[code] & ctype) != 0)
 
 ONIG_EXTERN const UChar OnigEncISO_8859_1_ToLowerCaseTable[];
 ONIG_EXTERN const UChar OnigEncISO_8859_1_ToUpperCaseTable[];
-ONIG_EXTERN const unsigned short OnigEnc_Unicode_ISO_8859_1_CtypeTable[];
-ONIG_EXTERN const OnigPairAmbigCodes OnigAsciiPairAmbigCodes[];
-
-#endif /* is not ONIG_RUBY_M17N */
 
 ONIG_EXTERN int
 onigenc_with_ascii_strncmp P_((OnigEncoding enc, const UChar* p, const UChar* end, const UChar* sascii /* ascii */, int n));
@@ -133,15 +172,18 @@ onigenc_step P_((OnigEncoding enc, const UChar* p, const UChar* end, int n));
 extern int  onig_is_in_code_range P_((const UChar* p, OnigCodePoint code));
 
 ONIG_EXTERN OnigEncoding  OnigEncDefaultCharEncoding;
-ONIG_EXTERN const UChar* OnigEncAsciiToLowerCaseTable;
+ONIG_EXTERN const UChar  OnigEncAsciiToLowerCaseTable[];
 ONIG_EXTERN const UChar  OnigEncAsciiToUpperCaseTable[];
 ONIG_EXTERN const unsigned short OnigEncAsciiCtypeTable[];
 
+#define ONIGENC_IS_ASCII_CODE(code)  ((code) < 0x80)
 #define ONIGENC_ASCII_CODE_TO_LOWER_CASE(c) OnigEncAsciiToLowerCaseTable[c]
 #define ONIGENC_ASCII_CODE_TO_UPPER_CASE(c) OnigEncAsciiToUpperCaseTable[c]
 #define ONIGENC_IS_ASCII_CODE_CTYPE(code,ctype) \
-  ((OnigEncAsciiCtypeTable[code] & ctype) != 0)
+  ((OnigEncAsciiCtypeTable[code] & CTYPE_TO_BIT(ctype)) != 0)
 #define ONIGENC_IS_ASCII_CODE_CASE_AMBIG(code) \
-    ONIGENC_IS_ASCII_CODE_CTYPE(code, (ONIGENC_CTYPE_UPPER | ONIGENC_CTYPE_LOWER))
+ (ONIGENC_IS_ASCII_CODE_CTYPE(code, ONIGENC_CTYPE_UPPER) ||\
+  ONIGENC_IS_ASCII_CODE_CTYPE(code, ONIGENC_CTYPE_LOWER))
+   
 
 #endif /* REGENC_H */
index d6ec91856d1f4cb31417f83193237b07482371d4..385e560d98d746a0b6074356eeb75212395208c0 100644 (file)
@@ -2,7 +2,7 @@
   regerror.c -  Oniguruma (regular expression library)
 **********************************************************************/
 /*-
- * Copyright (c) 2002-2006  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp>
+ * Copyright (c) 2002-2007  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -85,9 +85,9 @@ onig_error_code_to_format(int code)
   case ONIGERR_END_PATTERN_AT_CONTROL:
     p = "end pattern at control"; break;
   case ONIGERR_META_CODE_SYNTAX:
-    p = "illegal meta-code syntax"; break;
+    p = "invalid meta-code syntax"; break;
   case ONIGERR_CONTROL_CODE_SYNTAX:
-    p = "illegal control-code syntax"; break;
+    p = "invalid control-code syntax"; break;
   case ONIGERR_CHAR_CLASS_VALUE_AT_END_OF_RANGE:
     p = "char-class value at end of range"; break;
   case ONIGERR_CHAR_CLASS_VALUE_AT_START_OF_RANGE:
@@ -142,8 +142,8 @@ onig_error_code_to_format(int code)
     p = "too big wide-char value"; break;
   case ONIGERR_TOO_LONG_WIDE_CHAR_VALUE:
     p = "too long wide-char value"; break;
-  case ONIGERR_INVALID_WIDE_CHAR_VALUE:
-    p = "invalid wide-char value"; break;
+  case ONIGERR_INVALID_CODE_POINT_VALUE:
+    p = "invalid code point value"; break;
   case ONIGERR_EMPTY_GROUP_NAME:
     p = "group name is empty"; break;
   case ONIGERR_INVALID_GROUP_NAME:
@@ -182,6 +182,15 @@ onig_error_code_to_format(int code)
   return (UChar* )p;
 }
 
+static void sprint_byte(char* s, unsigned int v)
+{
+  sprintf(s, "%02x", (v & 0377));
+}
+
+static void sprint_byte_with_x(char* s, unsigned int v)
+{
+  sprintf(s, "\\x%02x", (v & 0377));
+}
 
 static int to_ascii(OnigEncoding enc, UChar *s, UChar *end,
                    UChar buf[], int buf_size, int *is_over)
@@ -196,10 +205,17 @@ static int to_ascii(OnigEncoding enc, UChar *s, UChar *end,
     while (p < end) {
       code = ONIGENC_MBC_TO_CODE(enc, p, end);
       if (code >= 0x80) {
-       if (len + 5 <= buf_size) {
-         sprintf((char* )(&(buf[len])), "\\%03o",
-                 (unsigned int)(code & 0377));
-         len += 5;
+       if (code > 0xffff && len + 10 <= buf_size) {
+         sprint_byte_with_x((char*)(&(buf[len])), (unsigned int)(code >> 24));
+         sprint_byte((char*)(&(buf[len+4])),      (unsigned int)(code >> 16));
+         sprint_byte((char*)(&(buf[len+6])),      (unsigned int)(code >>  8));
+         sprint_byte((char*)(&(buf[len+8])),      (unsigned int)code);
+         len += 10;
+       }
+       else if (len + 6 <= buf_size) {
+         sprint_byte_with_x((char*)(&(buf[len])), (unsigned int)(code >> 8));
+         sprint_byte((char*)(&(buf[len+4])),      (unsigned int)code);
+         len += 6;
        }
        else {
          break;
@@ -209,7 +225,7 @@ static int to_ascii(OnigEncoding enc, UChar *s, UChar *end,
        buf[len++] = (UChar )code;
       }
 
-      p += enc_len(enc, p);
+      p += enclen(enc, p);
       if (len >= buf_size) break;
     }
 
@@ -317,7 +333,7 @@ onig_snprintf_with_pattern(buf, bufsize, enc, pat, pat_end, fmt, va_alist)
   va_list args;
 
   va_init_list(args, fmt);
-  n = vsnprintf((char* )buf, bufsize, (const char* )fmt, args);
+  n = xvsnprintf((char* )buf, bufsize, (const char* )fmt, args);
   va_end(args);
 
   need = (pat_end - pat) * 4 + 4;
@@ -328,17 +344,17 @@ onig_snprintf_with_pattern(buf, bufsize, enc, pat, pat_end, fmt, va_alist)
 
     p = pat;
     while (p < pat_end) {
-      if (*p == MC_ESC(enc)) {
+      if (*p == '\\') {
        *s++ = *p++;
-       len = enc_len(enc, p);
+       len = enclen(enc, p);
        while (len-- > 0) *s++ = *p++;
       }
       else if (*p == '/') {
-       *s++ = (unsigned char )MC_ESC(enc);
+       *s++ = (unsigned char )'\\';
        *s++ = *p++;
       }
       else if (ONIGENC_IS_MBC_HEAD(enc, p)) {
-        len = enc_len(enc, p);
+        len = enclen(enc, p);
         if (ONIGENC_MBC_MINLEN(enc) == 1) {
           while (len-- > 0) *s++ = *p++;
         }
@@ -346,7 +362,7 @@ onig_snprintf_with_pattern(buf, bufsize, enc, pat, pat_end, fmt, va_alist)
           int blen;
 
           while (len-- > 0) {
-            sprintf((char* )bs, "\\%03o", *p++ & 0377);
+           sprint_byte_with_x((char* )bs, (unsigned int )(*p++));
             blen = onigenc_str_bytelen_null(ONIG_ENCODING_ASCII, bs);
             bp = bs;
             while (blen-- > 0) *s++ = *bp++;
@@ -355,7 +371,7 @@ onig_snprintf_with_pattern(buf, bufsize, enc, pat, pat_end, fmt, va_alist)
       }
       else if (!ONIGENC_IS_CODE_PRINT(enc, *p) &&
               !ONIGENC_IS_CODE_SPACE(enc, *p)) {
-       sprintf((char* )bs, "\\%03o", *p++ & 0377);
+       sprint_byte_with_x((char* )bs, (unsigned int )(*p++));
        len = onigenc_str_bytelen_null(ONIG_ENCODING_ASCII, bs);
         bp = bs;
        while (len-- > 0) *s++ = *bp++;
index 918aa67aa88bd6565c9daa3e44ee20a28f2d0843..7430d7851491cbf8fe43027dde08b9f1191a6ee2 100644 (file)
@@ -2,7 +2,7 @@
   regexec.c -  Oniguruma (regular expression library)
 **********************************************************************/
 /*-
- * Copyright (c) 2002-2007  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp>
+ * Copyright (c) 2002-2008  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
 
 #include "regint.h"
 
+#define USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE
+
 #ifdef USE_CRNL_AS_LINE_TERMINATOR
 #define ONIGENC_IS_MBC_CRNL(enc,p,end) \
   (ONIGENC_MBC_TO_CODE(enc,p,end) == 13 && \
-   ONIGENC_IS_MBC_NEWLINE(enc,(p+enc_len(enc,p)),end))
+   ONIGENC_IS_MBC_NEWLINE(enc,(p+enclen(enc,p)),end))
 #endif
 
 #ifdef USE_CAPTURE_HISTORY
@@ -111,7 +113,7 @@ history_tree_add_child(OnigCaptureTreeNode* parent, OnigCaptureTreeNode* child)
         (OnigCaptureTreeNode** )xrealloc(parent->childs,
                                          sizeof(OnigCaptureTreeNode*) * n);
     }
-    CHECK_NULL_RETURN_VAL(parent->childs, ONIGERR_MEMORY);
+    CHECK_NULL_RETURN_MEMERR(parent->childs);
     for (i = parent->allocated; i < n; i++) {
       parent->childs[i] = (OnigCaptureTreeNode* )0;
     }
@@ -196,7 +198,7 @@ onig_region_resize(OnigRegion* region, int n)
   return 0;
 }
 
-extern int
+static int
 onig_region_resize_clear(OnigRegion* region, int n)
 {
   int r;
@@ -297,47 +299,6 @@ onig_region_copy(OnigRegion* to, OnigRegion* from)
 
 /** stack **/
 #define INVALID_STACK_INDEX   -1
-typedef long StackIndex;
-
-typedef struct _StackType {
-  unsigned int type;
-  union {
-    struct {
-      UChar *pcode;      /* byte code position */
-      UChar *pstr;       /* string position */
-      UChar *pstr_prev;  /* previous char position of pstr */
-#ifdef USE_COMBINATION_EXPLOSION_CHECK
-      unsigned int state_check;
-#endif
-    } state;
-    struct {
-      int   count;       /* for OP_REPEAT_INC, OP_REPEAT_INC_NG */
-      UChar *pcode;      /* byte code position (head of repeated target) */
-      int   num;         /* repeat id */
-    } repeat;
-    struct {
-      StackIndex si;     /* index of stack */
-    } repeat_inc;
-    struct {
-      int num;           /* memory num */
-      UChar *pstr;       /* start/end position */
-      /* Following information is setted, if this stack type is MEM-START */
-      StackIndex start;  /* prev. info (for backtrack  "(...)*" ) */
-      StackIndex end;    /* prev. info (for backtrack  "(...)*" ) */
-    } mem;
-    struct {
-      int num;           /* null check id */
-      UChar *pstr;       /* start position */
-    } null_check;
-#ifdef USE_SUBEXP_CALL
-    struct {
-      UChar *ret_addr;   /* byte code position */
-      int    num;        /* null check id */
-      UChar *pstr;       /* string position */
-    } call_frame;
-#endif
-  } u;
-} StackType;
 
 /* stack type */
 /* used by normal-POP */
@@ -365,22 +326,6 @@ typedef struct _StackType {
 #define STK_MASK_TO_VOID_TARGET    0x10ff
 #define STK_MASK_MEM_END_OR_MARK   0x8000  /* MEM_END or MEM_END_MARK */
 
-typedef struct {
-  void* stack_p;
-  int   stack_n;
-  OnigOptionType options;
-  OnigRegion*    region;
-  const UChar* start;   /* search start position (for \G: BEGIN_POSITION) */
-#ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
-  int    best_len;      /* for ONIG_OPTION_FIND_LONGEST */
-  UChar* best_s;
-#endif
-#ifdef USE_COMBINATION_EXPLOSION_CHECK
-  void* state_check_buff;
-  int   state_check_buff_size;
-#endif
-} MatchArg;
-
 #ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
 #define MATCH_ARG_INIT(msa, arg_option, arg_region, arg_start) do {\
   (msa).stack_p  = (void* )0;\
@@ -388,14 +333,14 @@ typedef struct {
   (msa).region   = (arg_region);\
   (msa).start    = (arg_start);\
   (msa).best_len = ONIG_MISMATCH;\
-} while (0)
+} while(0)
 #else
 #define MATCH_ARG_INIT(msa, arg_option, arg_region, arg_start) do {\
   (msa).stack_p  = (void* )0;\
   (msa).options  = (arg_option);\
   (msa).region   = (arg_region);\
   (msa).start    = (arg_start);\
-} while (0)
+} while(0)
 #endif
 
 #ifdef USE_COMBINATION_EXPLOSION_CHECK
@@ -424,14 +369,14 @@ typedef struct {
     (msa).state_check_buff = (void* )0;\
     (msa).state_check_buff_size = 0;\
   }\
-} while (0)
+  } while(0)
 
 #define MATCH_ARG_FREE(msa) do {\
   if ((msa).stack_p) xfree((msa).stack_p);\
   if ((msa).state_check_buff_size >= STATE_CHECK_BUFF_MALLOC_THRESHOLD_SIZE) { \
     if ((msa).state_check_buff) xfree((msa).state_check_buff);\
   }\
-} while (0);
+} while(0)
 #else
 #define STATE_CHECK_BUFF_INIT(msa, str_len, offset, state_num)
 #define MATCH_ARG_FREE(msa)  if ((msa).stack_p) xfree((msa).stack_p)
@@ -442,15 +387,15 @@ typedef struct {
 #define STACK_INIT(alloc_addr, ptr_num, stack_num)  do {\
   if (msa->stack_p) {\
     alloc_addr = (char* )xalloca(sizeof(char*) * (ptr_num));\
-    stk_alloc  = (StackType* )(msa->stack_p);\
+    stk_alloc  = (OnigStackType* )(msa->stack_p);\
     stk_base   = stk_alloc;\
     stk        = stk_base;\
     stk_end    = stk_base + msa->stack_n;\
   }\
   else {\
     alloc_addr = (char* )xalloca(sizeof(char*) * (ptr_num)\
-                      + sizeof(StackType) * (stack_num));\
-    stk_alloc  = (StackType* )(alloc_addr + sizeof(char*) * (ptr_num));\
+                      + sizeof(OnigStackType) * (stack_num));\
+    stk_alloc  = (OnigStackType* )(alloc_addr + sizeof(char*) * (ptr_num));\
     stk_base   = stk_alloc;\
     stk        = stk_base;\
     stk_end    = stk_base + (stack_num);\
@@ -480,11 +425,11 @@ onig_set_match_stack_limit_size(unsigned int size)
 }
 
 static int
-stack_double(StackType** arg_stk_base, StackType** arg_stk_end,
-            StackType** arg_stk, StackType* stk_alloc, MatchArg* msa)
+stack_double(OnigStackType** arg_stk_base, OnigStackType** arg_stk_end,
+            OnigStackType** arg_stk, OnigStackType* stk_alloc, OnigMatchArg* msa)
 {
   unsigned int n;
-  StackType *x, *stk_base, *stk_end, *stk;
+  OnigStackType *x, *stk_base, *stk_end, *stk;
 
   stk_base = *arg_stk_base;
   stk_end  = *arg_stk_end;
@@ -492,12 +437,12 @@ stack_double(StackType** arg_stk_base, StackType** arg_stk_end,
 
   n = stk_end - stk_base;
   if (stk_base == stk_alloc && IS_NULL(msa->stack_p)) {
-    x = (StackType* )xmalloc(sizeof(StackType) * n * 2);
+    x = (OnigStackType* )xmalloc(sizeof(OnigStackType) * n * 2);
     if (IS_NULL(x)) {
       STACK_SAVE;
       return ONIGERR_MEMORY;
     }
-    xmemcpy(x, stk_base, n * sizeof(StackType));
+    xmemcpy(x, stk_base, n * sizeof(OnigStackType));
     n *= 2;
   }
   else {
@@ -508,7 +453,7 @@ stack_double(StackType** arg_stk_base, StackType** arg_stk_end,
       else
         n = MatchStackLimitSize;
     }
-    x = (StackType* )xrealloc(stk_base, sizeof(StackType) * n);
+    x = (OnigStackType* )xrealloc(stk_base, sizeof(OnigStackType) * n);
     if (IS_NULL(x)) {
       STACK_SAVE;
       return ONIGERR_MEMORY;
@@ -680,7 +625,7 @@ stack_double(StackType** arg_stk_base, StackType** arg_stk_end,
       level--;\
     }\
   }\
-} while (0)
+} while(0)
 
 #define STACK_GET_MEM_RANGE(k, mnum, start, end) do {\
   int level = 0;\
@@ -698,7 +643,7 @@ stack_double(StackType** arg_stk_base, StackType** arg_stk_end,
     }\
     k++;\
   }\
-} while (0)
+} while(0)
 
 #define STACK_PUSH_NULL_CHECK_START(cnum, s) do {\
   STACK_ENSURE(1);\
@@ -844,7 +789,7 @@ stack_double(StackType** arg_stk_base, StackType** arg_stk_end,
 } while(0)
 
 #define STACK_STOP_BT_END do {\
-  StackType *k = stk;\
+  OnigStackType *k = stk;\
   while (1) {\
     k--;\
     STACK_BASE_CHECK(k, "STACK_STOP_BT_END"); \
@@ -859,7 +804,7 @@ stack_double(StackType** arg_stk_base, StackType** arg_stk_end,
 } while(0)
 
 #define STACK_NULL_CHECK(isnull,id,s) do {\
-  StackType* k = stk;\
+  OnigStackType* k = stk;\
   while (1) {\
     k--;\
     STACK_BASE_CHECK(k, "STACK_NULL_CHECK"); \
@@ -874,7 +819,7 @@ stack_double(StackType** arg_stk_base, StackType** arg_stk_end,
 
 #define STACK_NULL_CHECK_REC(isnull,id,s) do {\
   int level = 0;\
-  StackType* k = stk;\
+  OnigStackType* k = stk;\
   while (1) {\
     k--;\
     STACK_BASE_CHECK(k, "STACK_NULL_CHECK_REC"); \
@@ -894,7 +839,7 @@ stack_double(StackType** arg_stk_base, StackType** arg_stk_end,
 } while(0)
 
 #define STACK_NULL_CHECK_MEMST(isnull,id,s,reg) do {\
-  StackType* k = stk;\
+  OnigStackType* k = stk;\
   while (1) {\
     k--;\
     STACK_BASE_CHECK(k, "STACK_NULL_CHECK_MEMST"); \
@@ -934,7 +879,7 @@ stack_double(StackType** arg_stk_base, StackType** arg_stk_end,
 
 #define STACK_NULL_CHECK_MEMST_REC(isnull,id,s,reg) do {\
   int level = 0;\
-  StackType* k = stk;\
+  OnigStackType* k = stk;\
   while (1) {\
     k--;\
     STACK_BASE_CHECK(k, "STACK_NULL_CHECK_MEMST_REC"); \
@@ -996,11 +941,11 @@ stack_double(StackType** arg_stk_base, StackType** arg_stk_end,
     else if (k->type == STK_CALL_FRAME) level--;\
     else if (k->type == STK_RETURN)     level++;\
   }\
-} while (0)
+} while(0)
 
 #define STACK_RETURN(addr)  do {\
   int level = 0;\
-  StackType* k = stk;\
+  OnigStackType* k = stk;\
   while (1) {\
     k--;\
     STACK_BASE_CHECK(k, "STACK_RETURN"); \
@@ -1023,25 +968,25 @@ stack_double(StackType** arg_stk_base, StackType** arg_stk_end,
   }\
 } while(0)
 
-#define STRING_CMP_IC(ambig_flag,s1,ps2,len) do {\
-  if (string_cmp_ic(encode, ambig_flag, s1, ps2, len) == 0) \
+#define STRING_CMP_IC(case_fold_flag,s1,ps2,len) do {\
+  if (string_cmp_ic(encode, case_fold_flag, s1, ps2, len) == 0) \
     goto fail; \
 } while(0)
 
-static int string_cmp_ic(OnigEncoding enc, int ambig_flag,
+static int string_cmp_ic(OnigEncoding enc, int case_fold_flag,
                         UChar* s1, UChar** ps2, int mblen)
 {
-  UChar buf1[ONIGENC_MBC_NORMALIZE_MAXLEN];
-  UChar buf2[ONIGENC_MBC_NORMALIZE_MAXLEN];
-  UChar *p1, *p2, *end, *s2, *end2;
+  UChar buf1[ONIGENC_MBC_CASE_FOLD_MAXLEN];
+  UChar buf2[ONIGENC_MBC_CASE_FOLD_MAXLEN];
+  UChar *p1, *p2, *end1, *s2, *end2;
   int len1, len2;
 
   s2   = *ps2;
-  end  = s1 + mblen;
+  end1 = s1 + mblen;
   end2 = s2 + mblen;
-  while (s1 < end) {
-    len1 = ONIGENC_MBC_TO_NORMALIZE(enc, ambig_flag, &s1, end, buf1);
-    len2 = ONIGENC_MBC_TO_NORMALIZE(enc, ambig_flag, &s2, end2, buf2);
+  while (s1 < end1) {
+    len1 = ONIGENC_MBC_CASE_FOLD(enc, case_fold_flag, &s1, end1, buf1);
+    len2 = ONIGENC_MBC_CASE_FOLD(enc, case_fold_flag, &s2, end2, buf2);
     if (len1 != len2) return 0;
     p1 = buf1;
     p2 = buf2;
@@ -1065,31 +1010,36 @@ static int string_cmp_ic(OnigEncoding enc, int ambig_flag,
   }\
 } while(0)
 
-#define STRING_CMP_VALUE_IC(ambig_flag,s1,ps2,len,is_fail) do {\
-  if (string_cmp_ic(encode, ambig_flag, s1, ps2, len) == 0) \
+#define STRING_CMP_VALUE_IC(case_fold_flag,s1,ps2,len,is_fail) do {\
+  if (string_cmp_ic(encode, case_fold_flag, s1, ps2, len) == 0) \
     is_fail = 1; \
   else \
     is_fail = 0; \
 } while(0)
 
 
-#define ON_STR_BEGIN(s)  ((s) == str)
-#define ON_STR_END(s)    ((s) == end)
-#define IS_EMPTY_STR     (str == end)
-
-#define DATA_ENSURE(n) \
-  if (s + (n) > end) goto fail
-
+#define IS_EMPTY_STR           (str == end)
+#define ON_STR_BEGIN(s)       ((s) == str)
+#define ON_STR_END(s)         ((s) == end)
+#ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE
+#define DATA_ENSURE_CHECK1     (s < right_range)
+#define DATA_ENSURE_CHECK(n)   (s + (n) <= right_range)
+#define DATA_ENSURE(n)         if (s + (n) > right_range) goto fail
+#else
+#define DATA_ENSURE_CHECK1     (s < end)
 #define DATA_ENSURE_CHECK(n)   (s + (n) <= end)
+#define DATA_ENSURE(n)         if (s + (n) > end) goto fail
+#endif /* USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE */
+
 
 #ifdef USE_CAPTURE_HISTORY
 static int
-make_capture_history_tree(OnigCaptureTreeNode* node, StackType** kp,
-                          StackType* stk_top, UChar* str, regex_t* reg)
+make_capture_history_tree(OnigCaptureTreeNode* node, OnigStackType** kp,
+                          OnigStackType* stk_top, UChar* str, regex_t* reg)
 {
   int n, r;
   OnigCaptureTreeNode* child;
-  StackType* k = *kp;
+  OnigStackType* k = *kp;
 
   while (k < stk_top) {
     if (k->type == STK_MEM_START) {
@@ -1097,7 +1047,7 @@ make_capture_history_tree(OnigCaptureTreeNode* node, StackType** kp,
       if (n <= ONIG_MAX_CAPTURE_HISTORY_GROUP &&
           BIT_STATUS_AT(reg->capture_history, n) != 0) {
         child = history_node_new();
-        CHECK_NULL_RETURN_VAL(child, ONIGERR_MEMORY);
+        CHECK_NULL_RETURN_MEMERR(child);
         child->group = n;
         child->beg = (int )(k->u.mem.pstr - str);
         r = history_tree_add_child(node, child);
@@ -1124,7 +1074,7 @@ make_capture_history_tree(OnigCaptureTreeNode* node, StackType** kp,
 }
 #endif
 
-#ifdef USE_BACKREF_AT_LEVEL
+#ifdef USE_BACKREF_WITH_LEVEL
 static int mem_is_in_memp(int mem, int num, UChar* memp)
 {
   int i;
@@ -1138,13 +1088,13 @@ static int mem_is_in_memp(int mem, int num, UChar* memp)
 }
 
 static int backref_match_at_nested_level(regex_t* reg
-        , StackType* top, StackType* stk_base
-        , int ignore_case, int ambig_flag
+        , OnigStackType* top, OnigStackType* stk_base
+        , int ignore_case, int case_fold_flag
         , int nest, int mem_num, UChar* memp, UChar** s, const UChar* send)
 {
   UChar *ss, *p, *pstart, *pend = NULL_UCHARP;
   int level;
-  StackType* k;
+  OnigStackType* k;
 
   level = 0;
   k = top;
@@ -1166,7 +1116,7 @@ static int backref_match_at_nested_level(regex_t* reg
            ss = *s;
 
            if (ignore_case != 0) {
-             if (string_cmp_ic(reg->enc, ambig_flag,
+             if (string_cmp_ic(reg->enc, case_fold_flag,
                                pstart, &ss, (int )(pend - pstart)) == 0)
                return 0; /* or goto next_mem; */
            }
@@ -1192,70 +1142,8 @@ static int backref_match_at_nested_level(regex_t* reg
 
   return 0;
 }
-#endif /* USE_BACKREF_AT_LEVEL */
-
-
-#ifdef RUBY_PLATFORM
-
-typedef struct {
-  int state;
-  regex_t*  reg;
-  MatchArg* msa;
-  StackType* stk_base;
-} TrapEnsureArg;
-
-static VALUE
-trap_ensure(VALUE arg)
-{
-  TrapEnsureArg* ta = (TrapEnsureArg* )arg;
-
-  if (ta->state == 0) { /* trap_exec() is not normal return */
-    ONIG_STATE_DEC_THREAD(ta->reg);
-    if (! IS_NULL(ta->msa->stack_p) && ta->stk_base != ta->msa->stack_p)
-      xfree(ta->stk_base);
-
-    MATCH_ARG_FREE(*(ta->msa));
-  }
-
-  return Qnil;
-}
+#endif /* USE_BACKREF_WITH_LEVEL */
 
-static VALUE
-trap_exec(VALUE arg)
-{
-  TrapEnsureArg* ta;
-
-  rb_trap_exec();
-
-  ta = (TrapEnsureArg* )arg;
-  ta->state = 1; /* normal return */
-  return Qnil;
-}
-
-extern void
-onig_exec_trap(regex_t* reg, MatchArg* msa, StackType* stk_base)
-{
-  VALUE arg;
-  TrapEnsureArg ta;
-
-  ta.state    = 0;
-  ta.reg      = reg;
-  ta.msa      = msa;
-  ta.stk_base = stk_base;
-  arg = (VALUE )(&ta);
-  rb_ensure(trap_exec, arg, trap_ensure, arg);
-}
-
-#define CHECK_INTERRUPT_IN_MATCH_AT do {\
-  if (rb_trap_pending) {\
-    if (! rb_prohibit_interrupt) {\
-      onig_exec_trap(reg, msa, stk_base);\
-    }\
-  }\
-} while (0)
-#else
-#define CHECK_INTERRUPT_IN_MATCH_AT
-#endif /* RUBY_PLATFORM */
 
 #ifdef ONIG_DEBUG_STATISTICS
 
@@ -1288,41 +1176,26 @@ static int OpCurr = OP_FINISH;
 static int OpPrevTarget = OP_FAIL;
 static int MaxStackDepth = 0;
 
-#define STAT_OP_IN(opcode) do {\
+#define MOP_IN(opcode) do {\
   if (opcode == OpPrevTarget) OpPrevCounter[OpCurr]++;\
   OpCurr = opcode;\
   OpCounter[opcode]++;\
   GETTIME(ts);\
-} while (0)
+} while(0)
 
-#define STAT_OP_OUT do {\
+#define MOP_OUT do {\
   GETTIME(te);\
   OpTime[OpCurr] += TIMEDIFF(te, ts);\
-} while (0)
-
-#ifdef RUBY_PLATFORM
-
-/*
- * :nodoc:
- */
-static VALUE onig_stat_print(void)
-{
-  onig_print_statistics(stderr);
-  return Qnil;
-}
-#endif
+} while(0)
 
-extern void onig_statistics_init(void)
+extern void
+onig_statistics_init(void)
 {
   int i;
   for (i = 0; i < 256; i++) {
     OpCounter[i] = OpPrevCounter[i] = 0; OpTime[i] = 0;
   }
   MaxStackDepth = 0;
-
-#ifdef RUBY_PLATFORM
-  rb_define_global_function("onig_stat_print", onig_stat_print, 0);
-#endif
 }
 
 extern void
@@ -1341,73 +1214,15 @@ onig_print_statistics(FILE* f)
   stk++;\
   if (stk - stk_base > MaxStackDepth) \
     MaxStackDepth = stk - stk_base;\
-} while (0)
+} while(0)
 
 #else
 #define STACK_INC     stk++
 
-#define STAT_OP_IN(opcode)
-#define STAT_OP_OUT
+#define MOP_IN(opcode)
+#define MOP_OUT
 #endif
 
-extern int
-onig_is_in_code_range(const UChar* p, OnigCodePoint code)
-{
-  OnigCodePoint n, *data;
-  OnigCodePoint low, high, x;
-
-  GET_CODE_POINT(n, p);
-  data = (OnigCodePoint* )p;
-  data++;
-
-  for (low = 0, high = n; low < high; ) {
-    x = (low + high) >> 1;
-    if (code > data[x * 2 + 1])
-      low = x + 1;
-    else
-      high = x;
-  }
-
-  return ((low < n && code >= data[low * 2]) ? 1 : 0);
-}
-
-static int
-is_code_in_cc(int enclen, OnigCodePoint code, CClassNode* cc)
-{
-  int found;
-
-  if (enclen > 1 || (code >= SINGLE_BYTE_SIZE)) {
-    if (IS_NULL(cc->mbuf)) {
-      found = 0;
-    }
-    else {
-      found = (onig_is_in_code_range(cc->mbuf->p, code) != 0 ? 1 : 0);
-    }
-  }
-  else {
-    found = (BITSET_AT(cc->bs, code) == 0 ? 0 : 1);
-  }
-
-  if (IS_CCLASS_NOT(cc))
-    return !found;
-  else
-    return found;
-}
-
-extern int
-onig_is_code_in_cc(OnigEncoding enc, OnigCodePoint code, CClassNode* cc)
-{
-  int len;
-
-  if (ONIGENC_MBC_MINLEN(enc) > 1) {
-    len = 2;
-  }
-  else {
-    len = ONIGENC_CODE_TO_MBCLEN(enc, code);
-  }
-  return is_code_in_cc(len, code, cc);
-}
-
 
 /* matching region of POSIX API */
 typedef int regoff_t;
@@ -1420,8 +1235,11 @@ typedef struct {
 /* match data(str - end) from position (sstart). */
 /* if sstart == str then set sprev to NULL. */
 static int
-match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart,
-        UChar* sprev, MatchArg* msa)
+match_at(regex_t* reg, const UChar* str, const UChar* end,
+#ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE
+        const UChar* right_range,
+#endif
+        const UChar* sstart, UChar* sprev, OnigMatchArg* msa)
 {
   static UChar FinishCode[] = { OP_FINISH };
 
@@ -1431,15 +1249,15 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart,
   RelAddrType addr;
   OnigOptionType option = reg->options;
   OnigEncoding encode = reg->enc;
-  OnigAmbigType ambig_flag = reg->ambig_flag;
+  OnigCaseFoldType case_fold_flag = reg->case_fold_flag;
   UChar *s, *q, *sbegin;
   UChar *p = reg->p;
   char *alloca_base;
-  StackType *stk_alloc, *stk_base, *stk, *stk_end;
-  StackType *stkp; /* used as any purpose. */
-  StackIndex si;
-  StackIndex *repeat_stk;
-  StackIndex *mem_start_stk, *mem_end_stk;
+  OnigStackType *stk_alloc, *stk_base, *stk, *stk_end;
+  OnigStackType *stkp; /* used as any purpose. */
+  OnigStackIndex si;
+  OnigStackIndex *repeat_stk;
+  OnigStackIndex *mem_start_stk, *mem_end_stk;
 #ifdef USE_COMBINATION_EXPLOSION_CHECK
   int scv;
   unsigned char* state_check_buff = msa->state_check_buff;
@@ -1450,9 +1268,9 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart,
   STACK_INIT(alloca_base, n, INIT_MATCH_STACK_SIZE);
   pop_level = reg->stack_pop_level;
   num_mem = reg->num_mem;
-  repeat_stk = (StackIndex* )alloca_base;
+  repeat_stk = (OnigStackIndex* )alloca_base;
 
-  mem_start_stk = (StackIndex* )(repeat_stk + reg->num_repeat);
+  mem_start_stk = (OnigStackIndex* )(repeat_stk + reg->num_repeat);
   mem_end_stk   = mem_start_stk + num_mem;
   mem_start_stk--; /* for index start from 1,
                      mem_start_stk[1]..mem_start_stk[num_mem] */
@@ -1480,13 +1298,13 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart,
       fprintf(stderr, "%4d> \"", (int )(s - str));
       bp = buf;
       for (i = 0, q = s; i < 7 && q < end; i++) {
-       len = enc_len(encode, q);
+       len = enclen(encode, q);
        while (len-- > 0) *bp++ = *q++;
       }
       if (q < end) { xmemcpy(bp, "...\"", 4); bp += 4; }
       else         { xmemcpy(bp, "\"",    1); bp += 1; }
       *bp = 0;
-      fputs(buf, stderr);
+      fputs((char* )buf, stderr);
       for (i = 0; i < 20 - (bp - buf); i++) fputc(' ', stderr);
       onig_print_compiled_byte_code(stderr, p, NULL, encode);
       fprintf(stderr, "\n");
@@ -1495,7 +1313,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart,
 
     sbegin = s;
     switch (*p++) {
-    case OP_END:  STAT_OP_IN(OP_END);
+    case OP_END:  MOP_IN(OP_END);
       n = s - sstart;
       if (n > best_len) {
        OnigRegion* region;
@@ -1512,7 +1330,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart,
        best_len = n;
        region = msa->region;
        if (region) {
-#ifdef USE_POSIX_REGION_OPTION
+#ifdef USE_POSIX_API_REGION_OPTION
          if (IS_POSIX_REGION(msa->options)) {
            posix_regmatch_t* rmt = (posix_regmatch_t* )region;
 
@@ -1535,7 +1353,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart,
            }
          }
          else {
-#endif /* USE_POSIX_REGION_OPTION */
+#endif /* USE_POSIX_API_REGION_OPTION */
            region->beg[0] = sstart - str;
            region->end[0] = s      - str;
            for (i = 1; i <= num_mem; i++) {
@@ -1561,7 +1379,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart,
 
               if (IS_NULL(region->history_root)) {
                 region->history_root = node = history_node_new();
-                CHECK_NULL_RETURN_VAL(node, ONIGERR_MEMORY);
+                CHECK_NULL_RETURN_MEMERR(node);
               }
               else {
                 node = region->history_root;
@@ -1581,7 +1399,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart,
               }
            }
 #endif /* USE_CAPTURE_HISTORY */
-#ifdef USE_POSIX_REGION_OPTION
+#ifdef USE_POSIX_API_REGION_OPTION
          } /* else IS_POSIX_REGION() */
 #endif
        } /* if (region) */
@@ -1590,14 +1408,14 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart,
 #ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
     end_best_len:
 #endif
-      STAT_OP_OUT;
+      MOP_OUT;
 
       if (IS_FIND_CONDITION(option)) {
        if (IS_FIND_NOT_EMPTY(option) && s == sstart) {
          best_len = ONIG_MISMATCH;
          goto fail; /* for retry */
        }
-       if (IS_FIND_LONGEST(option) && s < end) {
+       if (IS_FIND_LONGEST(option) && DATA_ENSURE_CHECK1) {
          goto fail; /* for retry */
        }
       }
@@ -1606,7 +1424,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart,
       goto finish;
       break;
 
-    case OP_EXACT1:  STAT_OP_IN(OP_EXACT1);
+    case OP_EXACT1:  MOP_IN(OP_EXACT1);
 #if 0
       DATA_ENSURE(1);
       if (*p != *s) goto fail;
@@ -1615,19 +1433,19 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart,
       if (*p != *s++) goto fail;
       DATA_ENSURE(0);
       p++;
-      STAT_OP_OUT;
+      MOP_OUT;
       break;
 
-    case OP_EXACT1_IC:  STAT_OP_IN(OP_EXACT1_IC);
+    case OP_EXACT1_IC:  MOP_IN(OP_EXACT1_IC);
       {
        int len;
-       UChar *q, *ss, *sp, lowbuf[ONIGENC_MBC_NORMALIZE_MAXLEN];
+       UChar *q, lowbuf[ONIGENC_MBC_CASE_FOLD_MAXLEN];
 
        DATA_ENSURE(1);
-        ss = s;
-        sp = p;
-
-       len = ONIGENC_MBC_TO_NORMALIZE(encode, ambig_flag, &s, end, lowbuf);
+       len = ONIGENC_MBC_CASE_FOLD(encode,
+                   /* DISABLE_CASE_FOLD_MULTI_CHAR(case_fold_flag), */
+                   case_fold_flag,
+                   &s, end, lowbuf);
        DATA_ENSURE(0);
        q = lowbuf;
        while (len-- > 0) {
@@ -1637,21 +1455,21 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart,
          p++; q++;
        }
       }
-      STAT_OP_OUT;
+      MOP_OUT;
       break;
 
-    case OP_EXACT2:  STAT_OP_IN(OP_EXACT2);
+    case OP_EXACT2:  MOP_IN(OP_EXACT2);
       DATA_ENSURE(2);
       if (*p != *s) goto fail;
       p++; s++;
       if (*p != *s) goto fail;
       sprev = s;
       p++; s++;
-      STAT_OP_OUT;
+      MOP_OUT;
       continue;
       break;
 
-    case OP_EXACT3:  STAT_OP_IN(OP_EXACT3);
+    case OP_EXACT3:  MOP_IN(OP_EXACT3);
       DATA_ENSURE(3);
       if (*p != *s) goto fail;
       p++; s++;
@@ -1660,11 +1478,11 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart,
       if (*p != *s) goto fail;
       sprev = s;
       p++; s++;
-      STAT_OP_OUT;
+      MOP_OUT;
       continue;
       break;
 
-    case OP_EXACT4:  STAT_OP_IN(OP_EXACT4);
+    case OP_EXACT4:  MOP_IN(OP_EXACT4);
       DATA_ENSURE(4);
       if (*p != *s) goto fail;
       p++; s++;
@@ -1675,11 +1493,11 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart,
       if (*p != *s) goto fail;
       sprev = s;
       p++; s++;
-      STAT_OP_OUT;
+      MOP_OUT;
       continue;
       break;
 
-    case OP_EXACT5:  STAT_OP_IN(OP_EXACT5);
+    case OP_EXACT5:  MOP_IN(OP_EXACT5);
       DATA_ENSURE(5);
       if (*p != *s) goto fail;
       p++; s++;
@@ -1692,25 +1510,25 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart,
       if (*p != *s) goto fail;
       sprev = s;
       p++; s++;
-      STAT_OP_OUT;
+      MOP_OUT;
       continue;
       break;
 
-    case OP_EXACTN:  STAT_OP_IN(OP_EXACTN);
+    case OP_EXACTN:  MOP_IN(OP_EXACTN);
       GET_LENGTH_INC(tlen, p);
       DATA_ENSURE(tlen);
       while (tlen-- > 0) {
        if (*p++ != *s++) goto fail;
       }
       sprev = s - 1;
-      STAT_OP_OUT;
+      MOP_OUT;
       continue;
       break;
 
-    case OP_EXACTN_IC:  STAT_OP_IN(OP_EXACTN_IC);
+    case OP_EXACTN_IC:  MOP_IN(OP_EXACTN_IC);
       {
        int len;
-       UChar *ss, *sp, *q, *endp, lowbuf[ONIGENC_MBC_NORMALIZE_MAXLEN];
+       UChar *q, *endp, lowbuf[ONIGENC_MBC_CASE_FOLD_MAXLEN];
 
        GET_LENGTH_INC(tlen, p);
        endp = p + tlen;
@@ -1718,35 +1536,33 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart,
        while (p < endp) {
          sprev = s;
          DATA_ENSURE(1);
-          ss = s;
-          sp = p;
-
-         len = ONIGENC_MBC_TO_NORMALIZE(encode, ambig_flag, &s, end, lowbuf);
+         len = ONIGENC_MBC_CASE_FOLD(encode,
+                     /* DISABLE_CASE_FOLD_MULTI_CHAR(case_fold_flag), */
+                     case_fold_flag,
+                     &s, end, lowbuf);
          DATA_ENSURE(0);
          q = lowbuf;
          while (len-- > 0) {
-           if (*p != *q) {
-              goto fail;
-            }
+           if (*p != *q) goto fail;
            p++; q++;
          }
        }
       }
 
-      STAT_OP_OUT;
+      MOP_OUT;
       continue;
       break;
 
-    case OP_EXACTMB2N1:  STAT_OP_IN(OP_EXACTMB2N1);
+    case OP_EXACTMB2N1:  MOP_IN(OP_EXACTMB2N1);
       DATA_ENSURE(2);
       if (*p != *s) goto fail;
       p++; s++;
       if (*p != *s) goto fail;
       p++; s++;
-      STAT_OP_OUT;
+      MOP_OUT;
       break;
 
-    case OP_EXACTMB2N2:  STAT_OP_IN(OP_EXACTMB2N2);
+    case OP_EXACTMB2N2:  MOP_IN(OP_EXACTMB2N2);
       DATA_ENSURE(4);
       if (*p != *s) goto fail;
       p++; s++;
@@ -1757,11 +1573,11 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart,
       p++; s++;
       if (*p != *s) goto fail;
       p++; s++;
-      STAT_OP_OUT;
+      MOP_OUT;
       continue;
       break;
 
-    case OP_EXACTMB2N3:  STAT_OP_IN(OP_EXACTMB2N3);
+    case OP_EXACTMB2N3:  MOP_IN(OP_EXACTMB2N3);
       DATA_ENSURE(6);
       if (*p != *s) goto fail;
       p++; s++;
@@ -1776,11 +1592,11 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart,
       p++; s++;
       if (*p != *s) goto fail;
       p++; s++;
-      STAT_OP_OUT;
+      MOP_OUT;
       continue;
       break;
 
-    case OP_EXACTMB2N:  STAT_OP_IN(OP_EXACTMB2N);
+    case OP_EXACTMB2N:  MOP_IN(OP_EXACTMB2N);
       GET_LENGTH_INC(tlen, p);
       DATA_ENSURE(tlen * 2);
       while (tlen-- > 0) {
@@ -1790,11 +1606,11 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart,
        p++; s++;
       }
       sprev = s - 2;
-      STAT_OP_OUT;
+      MOP_OUT;
       continue;
       break;
 
-    case OP_EXACTMB3N:  STAT_OP_IN(OP_EXACTMB3N);
+    case OP_EXACTMB3N:  MOP_IN(OP_EXACTMB3N);
       GET_LENGTH_INC(tlen, p);
       DATA_ENSURE(tlen * 3);
       while (tlen-- > 0) {
@@ -1806,11 +1622,11 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart,
        p++; s++;
       }
       sprev = s - 3;
-      STAT_OP_OUT;
+      MOP_OUT;
       continue;
       break;
 
-    case OP_EXACTMBN:  STAT_OP_IN(OP_EXACTMBN);
+    case OP_EXACTMBN:  MOP_IN(OP_EXACTMBN);
       GET_LENGTH_INC(tlen,  p);  /* mb-len */
       GET_LENGTH_INC(tlen2, p);  /* string len */
       tlen2 *= tlen;
@@ -1820,19 +1636,19 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart,
        p++; s++;
       }
       sprev = s - tlen;
-      STAT_OP_OUT;
+      MOP_OUT;
       continue;
       break;
 
-    case OP_CCLASS:  STAT_OP_IN(OP_CCLASS);
+    case OP_CCLASS:  MOP_IN(OP_CCLASS);
       DATA_ENSURE(1);
       if (BITSET_AT(((BitSetRef )p), *s) == 0) goto fail;
       p += SIZE_BITSET;
-      s += enc_len(encode, s);   /* OP_CCLASS can match mb-code. \D, \S */
-      STAT_OP_OUT;
+      s += enclen(encode, s);   /* OP_CCLASS can match mb-code. \D, \S */
+      MOP_OUT;
       break;
 
-    case OP_CCLASS_MB:  STAT_OP_IN(OP_CCLASS_MB);
+    case OP_CCLASS_MB:  MOP_IN(OP_CCLASS_MB);
       if (! ONIGENC_IS_MBC_HEAD(encode, s)) goto fail;
 
     cclass_mb:
@@ -1843,7 +1659,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart,
        int mb_len;
 
        DATA_ENSURE(1);
-       mb_len = enc_len(encode, s);
+       mb_len = enclen(encode, s);
        DATA_ENSURE(mb_len);
        ss = s;
        s += mb_len;
@@ -1858,10 +1674,10 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart,
 #endif
       }
       p += tlen;
-      STAT_OP_OUT;
+      MOP_OUT;
       break;
 
-    case OP_CCLASS_MIX:  STAT_OP_IN(OP_CCLASS_MIX);
+    case OP_CCLASS_MIX:  MOP_IN(OP_CCLASS_MIX);
       DATA_ENSURE(1);
       if (ONIGENC_IS_MBC_HEAD(encode, s)) {
        p += SIZE_BITSET;
@@ -1876,18 +1692,18 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart,
        p += tlen;
        s++;
       }
-      STAT_OP_OUT;
+      MOP_OUT;
       break;
 
-    case OP_CCLASS_NOT:  STAT_OP_IN(OP_CCLASS_NOT);
+    case OP_CCLASS_NOT:  MOP_IN(OP_CCLASS_NOT);
       DATA_ENSURE(1);
       if (BITSET_AT(((BitSetRef )p), *s) != 0) goto fail;
       p += SIZE_BITSET;
-      s += enc_len(encode, s);
-      STAT_OP_OUT;
+      s += enclen(encode, s);
+      MOP_OUT;
       break;
 
-    case OP_CCLASS_MB_NOT:  STAT_OP_IN(OP_CCLASS_MB_NOT);
+    case OP_CCLASS_MB_NOT:  MOP_IN(OP_CCLASS_MB_NOT);
       DATA_ENSURE(1);
       if (! ONIGENC_IS_MBC_HEAD(encode, s)) {
        s++;
@@ -1901,9 +1717,9 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart,
       {
        OnigCodePoint code;
        UChar *ss;
-       int mb_len = enc_len(encode, s);
+       int mb_len = enclen(encode, s);
 
-       if (s + mb_len > end) {
+       if (! DATA_ENSURE_CHECK(mb_len)) {
           DATA_ENSURE(1);
          s = (UChar* )end;
          p += tlen;
@@ -1925,10 +1741,10 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart,
       p += tlen;
 
     cc_mb_not_success:
-      STAT_OP_OUT;
+      MOP_OUT;
       break;
 
-    case OP_CCLASS_MIX_NOT:  STAT_OP_IN(OP_CCLASS_MIX_NOT);
+    case OP_CCLASS_MIX_NOT:  MOP_IN(OP_CCLASS_MIX_NOT);
       DATA_ENSURE(1);
       if (ONIGENC_IS_MBC_HEAD(encode, s)) {
        p += SIZE_BITSET;
@@ -1943,10 +1759,10 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart,
        p += tlen;
        s++;
       }
-      STAT_OP_OUT;
+      MOP_OUT;
       break;
 
-    case OP_CCLASS_NODE:  STAT_OP_IN(OP_CCLASS_NODE);
+    case OP_CCLASS_NODE:  MOP_IN(OP_CCLASS_NODE);
       {
        OnigCodePoint code;
         void *node;
@@ -1955,49 +1771,49 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart,
 
         DATA_ENSURE(1);
         GET_POINTER_INC(node, p);
-       mb_len = enc_len(encode, s);
+       mb_len = enclen(encode, s);
        ss = s;
        s += mb_len;
        DATA_ENSURE(0);
        code = ONIGENC_MBC_TO_CODE(encode, ss, s);
-       if (is_code_in_cc(mb_len, code, node) == 0) goto fail;
+       if (onig_is_code_in_cc_len(mb_len, code, node) == 0) goto fail;
       }
-      STAT_OP_OUT;
+      MOP_OUT;
       break;
 
-    case OP_ANYCHAR:  STAT_OP_IN(OP_ANYCHAR);
+    case OP_ANYCHAR:  MOP_IN(OP_ANYCHAR);
       DATA_ENSURE(1);
-      n = enc_len(encode, s);
+      n = enclen(encode, s);
       DATA_ENSURE(n);
       if (ONIGENC_IS_MBC_NEWLINE(encode, s, end)) goto fail;
       s += n;
-      STAT_OP_OUT;
+      MOP_OUT;
       break;
 
-    case OP_ANYCHAR_ML:  STAT_OP_IN(OP_ANYCHAR_ML);
+    case OP_ANYCHAR_ML:  MOP_IN(OP_ANYCHAR_ML);
       DATA_ENSURE(1);
-      n = enc_len(encode, s);
+      n = enclen(encode, s);
       DATA_ENSURE(n);
       s += n;
-      STAT_OP_OUT;
+      MOP_OUT;
       break;
 
-    case OP_ANYCHAR_STAR:  STAT_OP_IN(OP_ANYCHAR_STAR);
-      while (s < end) {
+    case OP_ANYCHAR_STAR:  MOP_IN(OP_ANYCHAR_STAR);
+      while (DATA_ENSURE_CHECK1) {
        STACK_PUSH_ALT(p, s, sprev);
-       n = enc_len(encode, s);
+       n = enclen(encode, s);
         DATA_ENSURE(n);
         if (ONIGENC_IS_MBC_NEWLINE(encode, s, end))  goto fail;
         sprev = s;
         s += n;
       }
-      STAT_OP_OUT;
+      MOP_OUT;
       break;
 
-    case OP_ANYCHAR_ML_STAR:  STAT_OP_IN(OP_ANYCHAR_ML_STAR);
-      while (s < end) {
+    case OP_ANYCHAR_ML_STAR:  MOP_IN(OP_ANYCHAR_ML_STAR);
+      while (DATA_ENSURE_CHECK1) {
        STACK_PUSH_ALT(p, s, sprev);
-       n = enc_len(encode, s);
+       n = enclen(encode, s);
        if (n > 1) {
          DATA_ENSURE(n);
          sprev = s;
@@ -2008,31 +1824,31 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart,
          s++;
        }
       }
-      STAT_OP_OUT;
+      MOP_OUT;
       break;
 
-    case OP_ANYCHAR_STAR_PEEK_NEXT:  STAT_OP_IN(OP_ANYCHAR_STAR_PEEK_NEXT);
-      while (s < end) {
+    case OP_ANYCHAR_STAR_PEEK_NEXT:  MOP_IN(OP_ANYCHAR_STAR_PEEK_NEXT);
+      while (DATA_ENSURE_CHECK1) {
        if (*p == *s) {
          STACK_PUSH_ALT(p + 1, s, sprev);
        }
-       n = enc_len(encode, s);
+       n = enclen(encode, s);
         DATA_ENSURE(n);
         if (ONIGENC_IS_MBC_NEWLINE(encode, s, end))  goto fail;
         sprev = s;
         s += n;
       }
       p++;
-      STAT_OP_OUT;
+      MOP_OUT;
       break;
 
-    case OP_ANYCHAR_ML_STAR_PEEK_NEXT:STAT_OP_IN(OP_ANYCHAR_ML_STAR_PEEK_NEXT);
-      while (s < end) {
+    case OP_ANYCHAR_ML_STAR_PEEK_NEXT:MOP_IN(OP_ANYCHAR_ML_STAR_PEEK_NEXT);
+      while (DATA_ENSURE_CHECK1) {
        if (*p == *s) {
          STACK_PUSH_ALT(p + 1, s, sprev);
        }
-       n = enc_len(encode, s);
-       if (n >1) {
+       n = enclen(encode, s);
+       if (n > 1) {
          DATA_ENSURE(n);
          sprev = s;
          s += n;
@@ -2043,36 +1859,36 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart,
        }
       }
       p++;
-      STAT_OP_OUT;
+      MOP_OUT;
       break;
 
 #ifdef USE_COMBINATION_EXPLOSION_CHECK
-    case OP_STATE_CHECK_ANYCHAR_STAR:  STAT_OP_IN(OP_STATE_CHECK_ANYCHAR_STAR);
+    case OP_STATE_CHECK_ANYCHAR_STAR:  MOP_IN(OP_STATE_CHECK_ANYCHAR_STAR);
       GET_STATE_CHECK_NUM_INC(mem, p);
-      while (s < end) {
+      while (DATA_ENSURE_CHECK1) {
        STATE_CHECK_VAL(scv, mem);
        if (scv) goto fail;
 
        STACK_PUSH_ALT_WITH_STATE_CHECK(p, s, sprev, mem);
-       n = enc_len(encode, s);
+       n = enclen(encode, s);
         DATA_ENSURE(n);
         if (ONIGENC_IS_MBC_NEWLINE(encode, s, end))  goto fail;
         sprev = s;
         s += n;
       }
-      STAT_OP_OUT;
+      MOP_OUT;
       break;
 
     case OP_STATE_CHECK_ANYCHAR_ML_STAR:
-      STAT_OP_IN(OP_STATE_CHECK_ANYCHAR_ML_STAR);
+      MOP_IN(OP_STATE_CHECK_ANYCHAR_ML_STAR);
 
       GET_STATE_CHECK_NUM_INC(mem, p);
-      while (s < end) {
+      while (DATA_ENSURE_CHECK1) {
        STATE_CHECK_VAL(scv, mem);
        if (scv) goto fail;
 
        STACK_PUSH_ALT_WITH_STATE_CHECK(p, s, sprev, mem);
-       n = enc_len(encode, s);
+       n = enclen(encode, s);
        if (n > 1) {
          DATA_ENSURE(n);
          sprev = s;
@@ -2083,29 +1899,29 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart,
          s++;
        }
       }
-      STAT_OP_OUT;
+      MOP_OUT;
       break;
 #endif /* USE_COMBINATION_EXPLOSION_CHECK */
 
-    case OP_WORD:  STAT_OP_IN(OP_WORD);
+    case OP_WORD:  MOP_IN(OP_WORD);
       DATA_ENSURE(1);
       if (! ONIGENC_IS_MBC_WORD(encode, s, end))
        goto fail;
 
-      s += enc_len(encode, s);
-      STAT_OP_OUT;
+      s += enclen(encode, s);
+      MOP_OUT;
       break;
 
-    case OP_NOT_WORD:  STAT_OP_IN(OP_NOT_WORD);
+    case OP_NOT_WORD:  MOP_IN(OP_NOT_WORD);
       DATA_ENSURE(1);
       if (ONIGENC_IS_MBC_WORD(encode, s, end))
        goto fail;
 
-      s += enc_len(encode, s);
-      STAT_OP_OUT;
+      s += enclen(encode, s);
+      MOP_OUT;
       break;
 
-    case OP_WORD_BOUND:  STAT_OP_IN(OP_WORD_BOUND);
+    case OP_WORD_BOUND:  MOP_IN(OP_WORD_BOUND);
       if (ON_STR_BEGIN(s)) {
        DATA_ENSURE(1);
        if (! ONIGENC_IS_MBC_WORD(encode, s, end))
@@ -2120,13 +1936,13 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart,
            == ONIGENC_IS_MBC_WORD(encode, sprev, end))
          goto fail;
       }
-      STAT_OP_OUT;
+      MOP_OUT;
       continue;
       break;
 
-    case OP_NOT_WORD_BOUND:  STAT_OP_IN(OP_NOT_WORD_BOUND);
+    case OP_NOT_WORD_BOUND:  MOP_IN(OP_NOT_WORD_BOUND);
       if (ON_STR_BEGIN(s)) {
-       if (DATA_ENSURE_CHECK(1) && ONIGENC_IS_MBC_WORD(encode, s, end))
+       if (DATA_ENSURE_CHECK1 && ONIGENC_IS_MBC_WORD(encode, s, end))
          goto fail;
       }
       else if (ON_STR_END(s)) {
@@ -2138,25 +1954,25 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart,
            != ONIGENC_IS_MBC_WORD(encode, sprev, end))
          goto fail;
       }
-      STAT_OP_OUT;
+      MOP_OUT;
       continue;
       break;
 
 #ifdef USE_WORD_BEGIN_END
-    case OP_WORD_BEGIN:  STAT_OP_IN(OP_WORD_BEGIN);
-      if (DATA_ENSURE_CHECK(1) && ONIGENC_IS_MBC_WORD(encode, s, end)) {
+    case OP_WORD_BEGIN:  MOP_IN(OP_WORD_BEGIN);
+      if (DATA_ENSURE_CHECK1 && ONIGENC_IS_MBC_WORD(encode, s, end)) {
        if (ON_STR_BEGIN(s) || !ONIGENC_IS_MBC_WORD(encode, sprev, end)) {
-         STAT_OP_OUT;
+         MOP_OUT;
          continue;
        }
       }
       goto fail;
       break;
 
-    case OP_WORD_END:  STAT_OP_IN(OP_WORD_END);
+    case OP_WORD_END:  MOP_IN(OP_WORD_END);
       if (!ON_STR_BEGIN(s) && ONIGENC_IS_MBC_WORD(encode, sprev, end)) {
        if (ON_STR_END(s) || !ONIGENC_IS_MBC_WORD(encode, s, end)) {
-         STAT_OP_OUT;
+         MOP_OUT;
          continue;
        }
       }
@@ -2164,80 +1980,81 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart,
       break;
 #endif
 
-    case OP_BEGIN_BUF:  STAT_OP_IN(OP_BEGIN_BUF);
+    case OP_BEGIN_BUF:  MOP_IN(OP_BEGIN_BUF);
       if (! ON_STR_BEGIN(s)) goto fail;
 
-      STAT_OP_OUT;
+      MOP_OUT;
       continue;
       break;
 
-    case OP_END_BUF:  STAT_OP_IN(OP_END_BUF);
+    case OP_END_BUF:  MOP_IN(OP_END_BUF);
       if (! ON_STR_END(s)) goto fail;
 
-      STAT_OP_OUT;
+      MOP_OUT;
       continue;
       break;
 
-    case OP_BEGIN_LINE:  STAT_OP_IN(OP_BEGIN_LINE);
+    case OP_BEGIN_LINE:  MOP_IN(OP_BEGIN_LINE);
       if (ON_STR_BEGIN(s)) {
        if (IS_NOTBOL(msa->options)) goto fail;
-       STAT_OP_OUT;
+       MOP_OUT;
        continue;
       }
       else if (ONIGENC_IS_MBC_NEWLINE(encode, sprev, end) && !ON_STR_END(s)) {
-       STAT_OP_OUT;
+       MOP_OUT;
        continue;
       }
       goto fail;
       break;
 
-    case OP_END_LINE:  STAT_OP_IN(OP_END_LINE);
+    case OP_END_LINE:  MOP_IN(OP_END_LINE);
       if (ON_STR_END(s)) {
 #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
        if (IS_EMPTY_STR || !ONIGENC_IS_MBC_NEWLINE(encode, sprev, end)) {
 #endif
          if (IS_NOTEOL(msa->options)) goto fail;
-         STAT_OP_OUT;
+         MOP_OUT;
          continue;
 #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
        }
 #endif
       }
       else if (ONIGENC_IS_MBC_NEWLINE(encode, s, end)) {
-       STAT_OP_OUT;
+       MOP_OUT;
        continue;
       }
 #ifdef USE_CRNL_AS_LINE_TERMINATOR
       else if (ONIGENC_IS_MBC_CRNL(encode, s, end)) {
-       STAT_OP_OUT;
+       MOP_OUT;
        continue;
       }
 #endif
       goto fail;
       break;
 
-    case OP_SEMI_END_BUF:  STAT_OP_IN(OP_SEMI_END_BUF);
+    case OP_SEMI_END_BUF:  MOP_IN(OP_SEMI_END_BUF);
       if (ON_STR_END(s)) {
 #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
        if (IS_EMPTY_STR || !ONIGENC_IS_MBC_NEWLINE(encode, sprev, end)) {
 #endif
-         if (IS_NOTEOL(msa->options)) goto fail;   /* Is it needed? */
-         STAT_OP_OUT;
+         if (IS_NOTEOL(msa->options)) goto fail;
+         MOP_OUT;
          continue;
 #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
        }
 #endif
       }
       else if (ONIGENC_IS_MBC_NEWLINE(encode, s, end) &&
-              ON_STR_END(s + enc_len(encode, s))) {
-       STAT_OP_OUT;
+              ON_STR_END(s + enclen(encode, s))) {
+       MOP_OUT;
        continue;
       }
 #ifdef USE_CRNL_AS_LINE_TERMINATOR
       else if (ONIGENC_IS_MBC_CRNL(encode, s, end)) {
-        UChar* ss = s + enc_len(encode, s);
-        if (ON_STR_END(ss + enc_len(encode, ss))) {
-          STAT_OP_OUT;
+        UChar* ss = s + enclen(encode, s);
+       ss += enclen(encode, ss);
+        if (ON_STR_END(ss)) {
+          MOP_OUT;
           continue;
         }
       }
@@ -2245,79 +2062,79 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart,
       goto fail;
       break;
 
-    case OP_BEGIN_POSITION:  STAT_OP_IN(OP_BEGIN_POSITION);
+    case OP_BEGIN_POSITION:  MOP_IN(OP_BEGIN_POSITION);
       if (s != msa->start)
        goto fail;
 
-      STAT_OP_OUT;
+      MOP_OUT;
       continue;
       break;
 
-    case OP_MEMORY_START_PUSH:  STAT_OP_IN(OP_MEMORY_START_PUSH);
+    case OP_MEMORY_START_PUSH:  MOP_IN(OP_MEMORY_START_PUSH);
       GET_MEMNUM_INC(mem, p);
       STACK_PUSH_MEM_START(mem, s);
-      STAT_OP_OUT;
+      MOP_OUT;
       continue;
       break;
 
-    case OP_MEMORY_START:  STAT_OP_IN(OP_MEMORY_START);
+    case OP_MEMORY_START:  MOP_IN(OP_MEMORY_START);
       GET_MEMNUM_INC(mem, p);
-      mem_start_stk[mem] = (StackIndex )((void* )s);
-      STAT_OP_OUT;
+      mem_start_stk[mem] = (OnigStackIndex )((void* )s);
+      MOP_OUT;
       continue;
       break;
 
-    case OP_MEMORY_END_PUSH:  STAT_OP_IN(OP_MEMORY_END_PUSH);
+    case OP_MEMORY_END_PUSH:  MOP_IN(OP_MEMORY_END_PUSH);
       GET_MEMNUM_INC(mem, p);
       STACK_PUSH_MEM_END(mem, s);
-      STAT_OP_OUT;
+      MOP_OUT;
       continue;
       break;
 
-    case OP_MEMORY_END:  STAT_OP_IN(OP_MEMORY_END);
+    case OP_MEMORY_END:  MOP_IN(OP_MEMORY_END);
       GET_MEMNUM_INC(mem, p);
-      mem_end_stk[mem] = (StackIndex )((void* )s);
-      STAT_OP_OUT;
+      mem_end_stk[mem] = (OnigStackIndex )((void* )s);
+      MOP_OUT;
       continue;
       break;
 
 #ifdef USE_SUBEXP_CALL
-    case OP_MEMORY_END_PUSH_REC:  STAT_OP_IN(OP_MEMORY_END_PUSH_REC);
+    case OP_MEMORY_END_PUSH_REC:  MOP_IN(OP_MEMORY_END_PUSH_REC);
       GET_MEMNUM_INC(mem, p);
       STACK_GET_MEM_START(mem, stkp); /* should be before push mem-end. */
       STACK_PUSH_MEM_END(mem, s);
       mem_start_stk[mem] = GET_STACK_INDEX(stkp);
-      STAT_OP_OUT;
+      MOP_OUT;
       continue;
       break;
 
-    case OP_MEMORY_END_REC:  STAT_OP_IN(OP_MEMORY_END_REC);
+    case OP_MEMORY_END_REC:  MOP_IN(OP_MEMORY_END_REC);
       GET_MEMNUM_INC(mem, p);
-      mem_end_stk[mem] = (StackIndex )((void* )s);
+      mem_end_stk[mem] = (OnigStackIndex )((void* )s);
       STACK_GET_MEM_START(mem, stkp);
 
       if (BIT_STATUS_AT(reg->bt_mem_start, mem))
        mem_start_stk[mem] = GET_STACK_INDEX(stkp);
       else
-       mem_start_stk[mem] = (StackIndex )((void* )stkp->u.mem.pstr);
+       mem_start_stk[mem] = (OnigStackIndex )((void* )stkp->u.mem.pstr);
 
       STACK_PUSH_MEM_END_MARK(mem);
-      STAT_OP_OUT;
+      MOP_OUT;
       continue;
       break;
 #endif
 
-    case OP_BACKREF1:  STAT_OP_IN(OP_BACKREF1);
+    case OP_BACKREF1:  MOP_IN(OP_BACKREF1);
       mem = 1;
       goto backref;
       break;
 
-    case OP_BACKREF2:  STAT_OP_IN(OP_BACKREF2);
+    case OP_BACKREF2:  MOP_IN(OP_BACKREF2);
       mem = 2;
       goto backref;
       break;
 
-    case OP_BACKREFN:  STAT_OP_IN(OP_BACKREFN);
+    case OP_BACKREFN:  MOP_IN(OP_BACKREFN);
       GET_MEMNUM_INC(mem, p);
     backref:
       {
@@ -2342,15 +2159,15 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart,
        DATA_ENSURE(n);
        sprev = s;
        STRING_CMP(pstart, s, n);
-       while (sprev + (len = enc_len(encode, sprev)) < s)
+       while (sprev + (len = enclen(encode, sprev)) < s)
          sprev += len;
 
-       STAT_OP_OUT;
+       MOP_OUT;
        continue;
       }
       break;
 
-    case OP_BACKREFN_IC:  STAT_OP_IN(OP_BACKREFN_IC);
+    case OP_BACKREFN_IC:  MOP_IN(OP_BACKREFN_IC);
       GET_MEMNUM_INC(mem, p);
       {
        int len;
@@ -2373,16 +2190,16 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart,
        n = pend - pstart;
        DATA_ENSURE(n);
        sprev = s;
-       STRING_CMP_IC(ambig_flag, pstart, &s, n);
-       while (sprev + (len = enc_len(encode, sprev)) < s)
+       STRING_CMP_IC(case_fold_flag, pstart, &s, n);
+       while (sprev + (len = enclen(encode, sprev)) < s)
          sprev += len;
 
-       STAT_OP_OUT;
+       MOP_OUT;
        continue;
       }
       break;
 
-    case OP_BACKREF_MULTI:  STAT_OP_IN(OP_BACKREF_MULTI);
+    case OP_BACKREF_MULTI:  MOP_IN(OP_BACKREF_MULTI);
       {
        int len, is_fail;
        UChar *pstart, *pend, *swork;
@@ -2409,19 +2226,19 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart,
          STRING_CMP_VALUE(pstart, swork, n, is_fail);
          if (is_fail) continue;
          s = swork;
-         while (sprev + (len = enc_len(encode, sprev)) < s)
+         while (sprev + (len = enclen(encode, sprev)) < s)
            sprev += len;
 
          p += (SIZE_MEMNUM * (tlen - i - 1));
          break; /* success */
        }
        if (i == tlen) goto fail;
-       STAT_OP_OUT;
+       MOP_OUT;
        continue;
       }
       break;
 
-    case OP_BACKREF_MULTI_IC:  STAT_OP_IN(OP_BACKREF_MULTI_IC);
+    case OP_BACKREF_MULTI_IC:  MOP_IN(OP_BACKREF_MULTI_IC);
       {
        int len, is_fail;
        UChar *pstart, *pend, *swork;
@@ -2445,23 +2262,23 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart,
          DATA_ENSURE(n);
          sprev = s;
          swork = s;
-         STRING_CMP_VALUE_IC(ambig_flag, pstart, &swork, n, is_fail);
+         STRING_CMP_VALUE_IC(case_fold_flag, pstart, &swork, n, is_fail);
          if (is_fail) continue;
          s = swork;
-         while (sprev + (len = enc_len(encode, sprev)) < s)
+         while (sprev + (len = enclen(encode, sprev)) < s)
            sprev += len;
 
          p += (SIZE_MEMNUM * (tlen - i - 1));
          break; /* success */
        }
        if (i == tlen) goto fail;
-       STAT_OP_OUT;
+       MOP_OUT;
        continue;
       }
       break;
 
-#ifdef USE_BACKREF_AT_LEVEL
-    case OP_BACKREF_AT_LEVEL:
+#ifdef USE_BACKREF_WITH_LEVEL
+    case OP_BACKREF_WITH_LEVEL:
       {
        int len;
        OnigOptionType ic;
@@ -2472,9 +2289,9 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart,
        GET_LENGTH_INC(tlen,  p);
 
        sprev = s;
-       if (backref_match_at_nested_level(reg, stk, stk_base, ic, ambig_flag
-                                 , (int )level, (int )tlen, p, &s, end)) {
-         while (sprev + (len = enc_len(encode, sprev)) < s)
+       if (backref_match_at_nested_level(reg, stk, stk_base, ic
+                 , case_fold_flag, (int )level, (int )tlen, p, &s, end)) {
+         while (sprev + (len = enclen(encode, sprev)) < s)
            sprev += len;
 
          p += (SIZE_MEMNUM * tlen);
@@ -2482,35 +2299,37 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart,
        else
          goto fail;
 
-       STAT_OP_OUT;
+       MOP_OUT;
        continue;
       }
       
       break;
 #endif
-    
-    case OP_SET_OPTION_PUSH:  STAT_OP_IN(OP_SET_OPTION_PUSH);
+
+#if 0   /* no need: IS_DYNAMIC_OPTION() == 0 */
+    case OP_SET_OPTION_PUSH:  MOP_IN(OP_SET_OPTION_PUSH);
       GET_OPTION_INC(option, p);
       STACK_PUSH_ALT(p, s, sprev);
       p += SIZE_OP_SET_OPTION + SIZE_OP_FAIL;
-      STAT_OP_OUT;
+      MOP_OUT;
       continue;
       break;
 
-    case OP_SET_OPTION:  STAT_OP_IN(OP_SET_OPTION);
+    case OP_SET_OPTION:  MOP_IN(OP_SET_OPTION);
       GET_OPTION_INC(option, p);
-      STAT_OP_OUT;
+      MOP_OUT;
       continue;
       break;
+#endif
 
-    case OP_NULL_CHECK_START:  STAT_OP_IN(OP_NULL_CHECK_START);
+    case OP_NULL_CHECK_START:  MOP_IN(OP_NULL_CHECK_START);
       GET_MEMNUM_INC(mem, p);    /* mem: null check id */
       STACK_PUSH_NULL_CHECK_START(mem, s);
-      STAT_OP_OUT;
+      MOP_OUT;
       continue;
       break;
 
-    case OP_NULL_CHECK_END:  STAT_OP_IN(OP_NULL_CHECK_END);
+    case OP_NULL_CHECK_END:  MOP_IN(OP_NULL_CHECK_END);
       {
        int isnull;
 
@@ -2540,12 +2359,12 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart,
          }
        }
       }
-      STAT_OP_OUT;
+      MOP_OUT;
       continue;
       break;
 
-#ifdef USE_INFINITE_REPEAT_MONOMANIAC_MEM_STATUS_CHECK
-    case OP_NULL_CHECK_END_MEMST:  STAT_OP_IN(OP_NULL_CHECK_END_MEMST);
+#ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT
+    case OP_NULL_CHECK_END_MEMST:  MOP_IN(OP_NULL_CHECK_END_MEMST);
       {
        int isnull;
 
@@ -2560,19 +2379,19 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart,
          goto  null_check_found;
        }
       }
-      STAT_OP_OUT;
+      MOP_OUT;
       continue;
       break;
 #endif
 
 #ifdef USE_SUBEXP_CALL
     case OP_NULL_CHECK_END_MEMST_PUSH:
-      STAT_OP_IN(OP_NULL_CHECK_END_MEMST_PUSH);
+      MOP_IN(OP_NULL_CHECK_END_MEMST_PUSH);
       {
        int isnull;
 
        GET_MEMNUM_INC(mem, p); /* mem: null check id */
-#ifdef USE_INFINITE_REPEAT_MONOMANIAC_MEM_STATUS_CHECK
+#ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT
        STACK_NULL_CHECK_MEMST_REC(isnull, mem, s, reg);
 #else
        STACK_NULL_CHECK_REC(isnull, mem, s);
@@ -2589,39 +2408,39 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart,
          STACK_PUSH_NULL_CHECK_END(mem);
        }
       }
-      STAT_OP_OUT;
+      MOP_OUT;
       continue;
       break;
 #endif
 
-    case OP_JUMP:  STAT_OP_IN(OP_JUMP);
+    case OP_JUMP:  MOP_IN(OP_JUMP);
       GET_RELADDR_INC(addr, p);
       p += addr;
-      STAT_OP_OUT;
+      MOP_OUT;
       CHECK_INTERRUPT_IN_MATCH_AT;
       continue;
       break;
 
-    case OP_PUSH:  STAT_OP_IN(OP_PUSH);
+    case OP_PUSH:  MOP_IN(OP_PUSH);
       GET_RELADDR_INC(addr, p);
       STACK_PUSH_ALT(p + addr, s, sprev);
-      STAT_OP_OUT;
+      MOP_OUT;
       continue;
       break;
 
 #ifdef USE_COMBINATION_EXPLOSION_CHECK
-    case OP_STATE_CHECK_PUSH:  STAT_OP_IN(OP_STATE_CHECK_PUSH);
+    case OP_STATE_CHECK_PUSH:  MOP_IN(OP_STATE_CHECK_PUSH);
       GET_STATE_CHECK_NUM_INC(mem, p);
       STATE_CHECK_VAL(scv, mem);
       if (scv) goto fail;
 
       GET_RELADDR_INC(addr, p);
       STACK_PUSH_ALT_WITH_STATE_CHECK(p + addr, s, sprev, mem);
-      STAT_OP_OUT;
+      MOP_OUT;
       continue;
       break;
 
-    case OP_STATE_CHECK_PUSH_OR_JUMP:  STAT_OP_IN(OP_STATE_CHECK_PUSH_OR_JUMP);
+    case OP_STATE_CHECK_PUSH_OR_JUMP:  MOP_IN(OP_STATE_CHECK_PUSH_OR_JUMP);
       GET_STATE_CHECK_NUM_INC(mem, p);
       GET_RELADDR_INC(addr, p);
       STATE_CHECK_VAL(scv, mem);
@@ -2631,54 +2450,54 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart,
       else {
        STACK_PUSH_ALT_WITH_STATE_CHECK(p + addr, s, sprev, mem);
       }
-      STAT_OP_OUT;
+      MOP_OUT;
       continue;
       break;
 
-    case OP_STATE_CHECK:  STAT_OP_IN(OP_STATE_CHECK);
+    case OP_STATE_CHECK:  MOP_IN(OP_STATE_CHECK);
       GET_STATE_CHECK_NUM_INC(mem, p);
       STATE_CHECK_VAL(scv, mem);
       if (scv) goto fail;
 
       STACK_PUSH_STATE_CHECK(s, mem);
-      STAT_OP_OUT;
+      MOP_OUT;
       continue;
       break;
 #endif /* USE_COMBINATION_EXPLOSION_CHECK */
 
-    case OP_POP:  STAT_OP_IN(OP_POP);
+    case OP_POP:  MOP_IN(OP_POP);
       STACK_POP_ONE;
-      STAT_OP_OUT;
+      MOP_OUT;
       continue;
       break;
 
-    case OP_PUSH_OR_JUMP_EXACT1:  STAT_OP_IN(OP_PUSH_OR_JUMP_EXACT1);
+    case OP_PUSH_OR_JUMP_EXACT1:  MOP_IN(OP_PUSH_OR_JUMP_EXACT1);
       GET_RELADDR_INC(addr, p);
-      if (*p == *s && DATA_ENSURE_CHECK(1)) {
+      if (*p == *s && DATA_ENSURE_CHECK1) {
        p++;
        STACK_PUSH_ALT(p + addr, s, sprev);
-       STAT_OP_OUT;
+       MOP_OUT;
        continue;
       }
       p += (addr + 1);
-      STAT_OP_OUT;
+      MOP_OUT;
       continue;
       break;
 
-    case OP_PUSH_IF_PEEK_NEXT:  STAT_OP_IN(OP_PUSH_IF_PEEK_NEXT);
+    case OP_PUSH_IF_PEEK_NEXT:  MOP_IN(OP_PUSH_IF_PEEK_NEXT);
       GET_RELADDR_INC(addr, p);
       if (*p == *s) {
        p++;
        STACK_PUSH_ALT(p + addr, s, sprev);
-       STAT_OP_OUT;
+       MOP_OUT;
        continue;
       }
       p++;
-      STAT_OP_OUT;
+      MOP_OUT;
       continue;
       break;
 
-    case OP_REPEAT:  STAT_OP_IN(OP_REPEAT);
+    case OP_REPEAT:  MOP_IN(OP_REPEAT);
       {
        GET_MEMNUM_INC(mem, p);    /* mem: OP_REPEAT ID */
        GET_RELADDR_INC(addr, p);
@@ -2691,11 +2510,11 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart,
          STACK_PUSH_ALT(p + addr, s, sprev);
        }
       }
-      STAT_OP_OUT;
+      MOP_OUT;
       continue;
       break;
 
-    case OP_REPEAT_NG:  STAT_OP_IN(OP_REPEAT_NG);
+    case OP_REPEAT_NG:  MOP_IN(OP_REPEAT_NG);
       {
        GET_MEMNUM_INC(mem, p);    /* mem: OP_REPEAT ID */
        GET_RELADDR_INC(addr, p);
@@ -2709,11 +2528,11 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart,
          p += addr;
        }
       }
-      STAT_OP_OUT;
+      MOP_OUT;
       continue;
       break;
 
-    case OP_REPEAT_INC:  STAT_OP_IN(OP_REPEAT_INC);
+    case OP_REPEAT_INC:  MOP_IN(OP_REPEAT_INC);
       GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */
       si = repeat_stk[mem];
       stkp = STACK_AT(si);
@@ -2731,19 +2550,19 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart,
         p = stkp->u.repeat.pcode;
       }
       STACK_PUSH_REPEAT_INC(si);
-      STAT_OP_OUT;
+      MOP_OUT;
       CHECK_INTERRUPT_IN_MATCH_AT;
       continue;
       break;
 
-    case OP_REPEAT_INC_SG:  STAT_OP_IN(OP_REPEAT_INC_SG);
+    case OP_REPEAT_INC_SG:  MOP_IN(OP_REPEAT_INC_SG);
       GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */
       STACK_GET_REPEAT(mem, stkp);
       si = GET_STACK_INDEX(stkp);
       goto repeat_inc;
       break;
 
-    case OP_REPEAT_INC_NG:  STAT_OP_IN(OP_REPEAT_INC_NG);
+    case OP_REPEAT_INC_NG:  MOP_IN(OP_REPEAT_INC_NG);
       GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */
       si = repeat_stk[mem];
       stkp = STACK_AT(si);
@@ -2765,68 +2584,68 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart,
       else if (stkp->u.repeat.count == reg->repeat_range[mem].upper) {
         STACK_PUSH_REPEAT_INC(si);
       }
-      STAT_OP_OUT;
+      MOP_OUT;
       CHECK_INTERRUPT_IN_MATCH_AT;
       continue;
       break;
 
-    case OP_REPEAT_INC_NG_SG:  STAT_OP_IN(OP_REPEAT_INC_NG_SG);
+    case OP_REPEAT_INC_NG_SG:  MOP_IN(OP_REPEAT_INC_NG_SG);
       GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */
       STACK_GET_REPEAT(mem, stkp);
       si = GET_STACK_INDEX(stkp);
       goto repeat_inc_ng;
       break;
 
-    case OP_PUSH_POS:  STAT_OP_IN(OP_PUSH_POS);
+    case OP_PUSH_POS:  MOP_IN(OP_PUSH_POS);
       STACK_PUSH_POS(s, sprev);
-      STAT_OP_OUT;
+      MOP_OUT;
       continue;
       break;
 
-    case OP_POP_POS:  STAT_OP_IN(OP_POP_POS);
+    case OP_POP_POS:  MOP_IN(OP_POP_POS);
       {
        STACK_POS_END(stkp);
        s     = stkp->u.state.pstr;
        sprev = stkp->u.state.pstr_prev;
       }
-      STAT_OP_OUT;
+      MOP_OUT;
       continue;
       break;
 
-    case OP_PUSH_POS_NOT:  STAT_OP_IN(OP_PUSH_POS_NOT);
+    case OP_PUSH_POS_NOT:  MOP_IN(OP_PUSH_POS_NOT);
       GET_RELADDR_INC(addr, p);
       STACK_PUSH_POS_NOT(p + addr, s, sprev);
-      STAT_OP_OUT;
+      MOP_OUT;
       continue;
       break;
 
-    case OP_FAIL_POS:  STAT_OP_IN(OP_FAIL_POS);
+    case OP_FAIL_POS:  MOP_IN(OP_FAIL_POS);
       STACK_POP_TIL_POS_NOT;
       goto fail;
       break;
 
-    case OP_PUSH_STOP_BT:  STAT_OP_IN(OP_PUSH_STOP_BT);
+    case OP_PUSH_STOP_BT:  MOP_IN(OP_PUSH_STOP_BT);
       STACK_PUSH_STOP_BT;
-      STAT_OP_OUT;
+      MOP_OUT;
       continue;
       break;
 
-    case OP_POP_STOP_BT:  STAT_OP_IN(OP_POP_STOP_BT);
+    case OP_POP_STOP_BT:  MOP_IN(OP_POP_STOP_BT);
       STACK_STOP_BT_END;
-      STAT_OP_OUT;
+      MOP_OUT;
       continue;
       break;
 
-    case OP_LOOK_BEHIND:  STAT_OP_IN(OP_LOOK_BEHIND);
+    case OP_LOOK_BEHIND:  MOP_IN(OP_LOOK_BEHIND);
       GET_LENGTH_INC(tlen, p);
       s = (UChar* )ONIGENC_STEP_BACK(encode, str, s, (int )tlen);
       if (IS_NULL(s)) goto fail;
       sprev = (UChar* )onigenc_get_prev_char_head(encode, str, s);
-      STAT_OP_OUT;
+      MOP_OUT;
       continue;
       break;
 
-    case OP_PUSH_LOOK_BEHIND_NOT:  STAT_OP_IN(OP_PUSH_LOOK_BEHIND_NOT);
+    case OP_PUSH_LOOK_BEHIND_NOT:  MOP_IN(OP_PUSH_LOOK_BEHIND_NOT);
       GET_RELADDR_INC(addr, p);
       GET_LENGTH_INC(tlen, p);
       q = (UChar* )ONIGENC_STEP_BACK(encode, str, s, (int )tlen);
@@ -2841,28 +2660,28 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart,
        s = q;
        sprev = (UChar* )onigenc_get_prev_char_head(encode, str, s);
       }
-      STAT_OP_OUT;
+      MOP_OUT;
       continue;
       break;
 
-    case OP_FAIL_LOOK_BEHIND_NOT:  STAT_OP_IN(OP_FAIL_LOOK_BEHIND_NOT);
+    case OP_FAIL_LOOK_BEHIND_NOT:  MOP_IN(OP_FAIL_LOOK_BEHIND_NOT);
       STACK_POP_TIL_LOOK_BEHIND_NOT;
       goto fail;
       break;
 
 #ifdef USE_SUBEXP_CALL
-    case OP_CALL:  STAT_OP_IN(OP_CALL);
+    case OP_CALL:  MOP_IN(OP_CALL);
       GET_ABSADDR_INC(addr, p);
       STACK_PUSH_CALL_FRAME(p);
       p = reg->p + addr;
-      STAT_OP_OUT;
+      MOP_OUT;
       continue;
       break;
 
-    case OP_RETURN:  STAT_OP_IN(OP_RETURN);
+    case OP_RETURN:  MOP_IN(OP_RETURN);
       STACK_RETURN(p);
       STACK_PUSH_RETURN;
-      STAT_OP_OUT;
+      MOP_OUT;
       continue;
       break;
 #endif
@@ -2872,9 +2691,9 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart,
       break;
 
     fail:
-      STAT_OP_OUT;
+      MOP_OUT;
       /* fall */
-    case OP_FAIL:  STAT_OP_IN(OP_FAIL);
+    case OP_FAIL:  MOP_IN(OP_FAIL);
       STACK_POP;
       p     = stk->u.state.pcode;
       s     = stk->u.state.pstr;
@@ -2887,7 +2706,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart,
       }
 #endif
 
-      STAT_OP_OUT;
+      MOP_OUT;
       continue;
       break;
 
@@ -2943,32 +2762,25 @@ slow_search(OnigEncoding enc, UChar* target, UChar* target_end,
       if (t == target_end)
        return s;
     }
-    s += enc_len(enc, s);
+    s += enclen(enc, s);
   }
 
   return (UChar* )NULL;
 }
 
 static int
-str_lower_case_match(OnigEncoding enc, int ambig_flag,
+str_lower_case_match(OnigEncoding enc, int case_fold_flag,
                      const UChar* t, const UChar* tend,
                     const UChar* p, const UChar* end)
 {
   int lowlen;
-  UChar *q, lowbuf[ONIGENC_MBC_NORMALIZE_MAXLEN];
-  const UChar* tsave;
-  const UChar* psave;
-
-  tsave = t;
-  psave = p;
+  UChar *q, lowbuf[ONIGENC_MBC_CASE_FOLD_MAXLEN];
 
   while (t < tend) {
-    lowlen = ONIGENC_MBC_TO_NORMALIZE(enc, ambig_flag, &p, end, lowbuf);
+    lowlen = ONIGENC_MBC_CASE_FOLD(enc, case_fold_flag, &p, end, lowbuf);
     q = lowbuf;
     while (lowlen > 0) {
-      if (*t++ != *q++) {
-       return 0;
-      }
+      if (*t++ != *q++)        return 0;
       lowlen--;
     }
   }
@@ -2977,7 +2789,7 @@ str_lower_case_match(OnigEncoding enc, int ambig_flag,
 }
 
 static UChar*
-slow_search_ic(OnigEncoding enc, int ambig_flag,
+slow_search_ic(OnigEncoding enc, int case_fold_flag,
               UChar* target, UChar* target_end,
               const UChar* text, const UChar* text_end, UChar* text_range)
 {
@@ -2991,10 +2803,11 @@ slow_search_ic(OnigEncoding enc, int ambig_flag,
   s = (UChar* )text;
 
   while (s < end) {
-    if (str_lower_case_match(enc, ambig_flag, target, target_end, s, text_end))
+    if (str_lower_case_match(enc, case_fold_flag, target, target_end,
+                            s, text_end))
       return s;
 
-    s += enc_len(enc, s);
+    s += enclen(enc, s);
   }
 
   return (UChar* )NULL;
@@ -3033,7 +2846,7 @@ slow_search_backward(OnigEncoding enc, UChar* target, UChar* target_end,
 }
 
 static UChar*
-slow_search_backward_ic(OnigEncoding enc, int ambig_flag,
+slow_search_backward_ic(OnigEncoding enc, int case_fold_flag,
                        UChar* target, UChar* target_end,
                        const UChar* text, const UChar* adjust_text,
                        const UChar* text_end, const UChar* text_start)
@@ -3048,7 +2861,7 @@ slow_search_backward_ic(OnigEncoding enc, int ambig_flag,
     s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, adjust_text, s);
 
   while (s >= text) {
-    if (str_lower_case_match(enc, ambig_flag,
+    if (str_lower_case_match(enc, case_fold_flag,
                              target, target_end, s, text_end))
       return s;
 
@@ -3084,15 +2897,14 @@ bm_search_notrev(regex_t* reg, const UChar* target, const UChar* target_end,
     while (s < end) {
       p = se = s + tlen1;
       t = tail;
-      while (t >= target && *p == *t) {
-        p--; t--;
+      while (*p == *t) {
+       if (t == target) return (UChar* )s;
+       p--; t--;
       }
-      if (t < target) return (UChar* )s;
-
       skip = reg->map[*se];
       t = s;
       do {
-        s += enc_len(reg->enc, s);
+        s += enclen(reg->enc, s);
       } while ((s - t) < skip && s < end);
     }
   }
@@ -3100,15 +2912,14 @@ bm_search_notrev(regex_t* reg, const UChar* target, const UChar* target_end,
     while (s < end) {
       p = se = s + tlen1;
       t = tail;
-      while (t >= target && *p == *t) {
-        p--; t--;
+      while (*p == *t) {
+       if (t == target) return (UChar* )s;
+       p--; t--;
       }
-      if (t < target) return (UChar* )s;
-
       skip = reg->int_map[*se];
       t = s;
       do {
-        s += enc_len(reg->enc, s);
+        s += enclen(reg->enc, s);
       } while ((s - t) < skip && s < end);
     }
   }
@@ -3133,10 +2944,10 @@ bm_search(regex_t* reg, const UChar* target, const UChar* target_end,
     while (s < end) {
       p = s;
       t = tail;
-      while (t >= target && *p == *t) {
+      while (*p == *t) {
+       if (t == target) return (UChar* )p;
        p--; t--;
       }
-      if (t < target) return (UChar* )(p + 1);
       s += reg->map[*s];
     }
   }
@@ -3144,10 +2955,10 @@ bm_search(regex_t* reg, const UChar* target, const UChar* target_end,
     while (s < end) {
       p = s;
       t = tail;
-      while (t >= target && *p == *t) {
+      while (*p == *t) {
+       if (t == target) return (UChar* )p;
        p--; t--;
       }
-      if (t < target) return (UChar* )(p + 1);
       s += reg->int_map[*s];
     }
   }
@@ -3155,7 +2966,8 @@ bm_search(regex_t* reg, const UChar* target, const UChar* target_end,
 }
 
 static int
-set_bm_backward_skip(UChar* s, UChar* end, OnigEncoding enc, int** skip)
+set_bm_backward_skip(UChar* s, UChar* end, OnigEncoding enc ARG_UNUSED,
+                    int** skip)
                     
 {
   int i, len;
@@ -3213,7 +3025,7 @@ map_search(OnigEncoding enc, UChar map[],
   while (s < text_range) {
     if (map[*s]) return (UChar* )s;
 
-    s += enc_len(enc, s);
+    s += enclen(enc, s);
   }
   return (UChar* )NULL;
 }
@@ -3239,7 +3051,7 @@ onig_match(regex_t* reg, const UChar* str, const UChar* end, const UChar* at, On
 {
   int r;
   UChar *prev;
-  MatchArg msa;
+  OnigMatchArg msa;
 
 #if defined(USE_RECOMPILE_API) && defined(USE_MULTI_THREAD_SYSTEM)
  start:
@@ -3275,7 +3087,7 @@ onig_match(regex_t* reg, const UChar* str, const UChar* end, const UChar* at, On
 #endif
 
   if (region
-#ifdef USE_POSIX_REGION_OPTION
+#ifdef USE_POSIX_API_REGION_OPTION
       && !IS_POSIX_REGION(option)
 #endif
       ) {
@@ -3286,7 +3098,11 @@ onig_match(regex_t* reg, const UChar* str, const UChar* end, const UChar* at, On
 
   if (r == 0) {
     prev = (UChar* )onigenc_get_prev_char_head(reg->enc, str, at);
-    r = match_at(reg, str, end, at, prev, &msa);
+    r = match_at(reg, str, end,
+#ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE
+                end,
+#endif
+                at, prev, &msa);
   }
 
   MATCH_ARG_FREE(msa);
@@ -3312,7 +3128,7 @@ forward_search_range(regex_t* reg, const UChar* str, const UChar* end, UChar* s,
     }
     else {
       UChar *q = p + reg->dmin;
-      while (p < q) p += enc_len(reg->enc, p);
+      while (p < q) p += enclen(reg->enc, p);
     }
   }
 
@@ -3322,7 +3138,7 @@ forward_search_range(regex_t* reg, const UChar* str, const UChar* end, UChar* s,
     p = slow_search(reg->enc, reg->exact, reg->exact_end, p, end, range);
     break;
   case ONIG_OPTIMIZE_EXACT_IC:
-    p = slow_search_ic(reg->enc, reg->ambig_flag,
+    p = slow_search_ic(reg->enc, reg->case_fold_flag,
                        reg->exact, reg->exact_end, p, end, range);
     break;
 
@@ -3343,7 +3159,7 @@ forward_search_range(regex_t* reg, const UChar* str, const UChar* end, UChar* s,
     if (p - reg->dmin < s) {
     retry_gate:
       pprev = p;
-      p += enc_len(reg->enc, p);
+      p += enclen(reg->enc, p);
       goto retry;
     }
 
@@ -3362,10 +3178,12 @@ forward_search_range(regex_t* reg, const UChar* str, const UChar* end, UChar* s,
 
       case ANCHOR_END_LINE:
        if (ON_STR_END(p)) {
+#ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
          prev = (UChar* )onigenc_get_prev_char_head(reg->enc,
                                            (pprev ? pprev : str), p);
          if (prev && ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end))
            goto retry_gate;
+#endif
        }
        else if (! ONIGENC_IS_MBC_NEWLINE(reg->enc, p, end)
 #ifdef USE_CRNL_AS_LINE_TERMINATOR
@@ -3443,7 +3261,7 @@ backward_search_range(regex_t* reg, const UChar* str, const UChar* end,
     break;
 
   case ONIG_OPTIMIZE_EXACT_IC:
-    p = slow_search_backward_ic(reg->enc, reg->ambig_flag,
+    p = slow_search_backward_ic(reg->enc, reg->case_fold_flag,
                                 reg->exact, reg->exact_end,
                                 range, adjrange, end, p);
     break;
@@ -3484,12 +3302,14 @@ backward_search_range(regex_t* reg, const UChar* str, const UChar* end,
 
       case ANCHOR_END_LINE:
        if (ON_STR_END(p)) {
+#ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
          prev = onigenc_get_prev_char_head(reg->enc, adjrange, p);
          if (IS_NULL(prev)) goto fail;
          if (ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end)) {
            p = prev;
            goto retry;
          }
+#endif
        }
        else if (! ONIGENC_IS_MBC_NEWLINE(reg->enc, p, end)
 #ifdef USE_CRNL_AS_LINE_TERMINATOR
@@ -3532,8 +3352,11 @@ onig_search(regex_t* reg, const UChar* str, const UChar* end,
 {
   int r;
   UChar *s, *prev;
-  MatchArg msa;
+  OnigMatchArg msa;
   const UChar *orig_start = start;
+#ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE
+  const UChar *orig_range = range;
+#endif
 
 #if defined(USE_RECOMPILE_API) && defined(USE_MULTI_THREAD_SYSTEM)
  start:
@@ -3567,7 +3390,7 @@ onig_search(regex_t* reg, const UChar* str, const UChar* end,
 #endif
 
   if (region
-#ifdef USE_POSIX_REGION_OPTION
+#ifdef USE_POSIX_API_REGION_OPTION
       && !IS_POSIX_REGION(option)
 #endif
       ) {
@@ -3577,8 +3400,32 @@ onig_search(regex_t* reg, const UChar* str, const UChar* end,
 
   if (start > end || start < str) goto mismatch_no_msa;
 
+
+#ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE
+#ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
+#define MATCH_AND_RETURN_CHECK(upper_range) \
+  r = match_at(reg, str, end, (upper_range), s, prev, &msa); \
+  if (r != ONIG_MISMATCH) {\
+    if (r >= 0) {\
+      if (! IS_FIND_LONGEST(reg->options)) {\
+        goto match;\
+      }\
+    }\
+    else goto finish; /* error */ \
+  }
+#else
+#define MATCH_AND_RETURN_CHECK(upper_range) \
+  r = match_at(reg, str, end, (upper_range), s, prev, &msa); \
+  if (r != ONIG_MISMATCH) {\
+    if (r >= 0) {\
+      goto match;\
+    }\
+    else goto finish; /* error */ \
+  }
+#endif /* USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE */
+#else
 #ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
-#define MATCH_AND_RETURN_CHECK \
+#define MATCH_AND_RETURN_CHECK(none) \
   r = match_at(reg, str, end, s, prev, &msa);\
   if (r != ONIG_MISMATCH) {\
     if (r >= 0) {\
@@ -3589,7 +3436,7 @@ onig_search(regex_t* reg, const UChar* str, const UChar* end,
     else goto finish; /* error */ \
   }
 #else
-#define MATCH_AND_RETURN_CHECK \
+#define MATCH_AND_RETURN_CHECK(none) \
   r = match_at(reg, str, end, s, prev, &msa);\
   if (r != ONIG_MISMATCH) {\
     if (r >= 0) {\
@@ -3597,7 +3444,9 @@ onig_search(regex_t* reg, const UChar* str, const UChar* end,
     }\
     else goto finish; /* error */ \
   }
-#endif
+#endif /* USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE */
+#endif /* USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE */
+
 
   /* anchor optimize: resume search range */
   if (reg->anchor != 0 && str < end) {
@@ -3700,10 +3549,10 @@ onig_search(regex_t* reg, const UChar* str, const UChar* end,
 
       MATCH_ARG_INIT(msa, option, region, start);
 #ifdef USE_COMBINATION_EXPLOSION_CHECK
-      msa.state_check_buff      = (void* )0;
-      msa.state_check_buff_size = 0;
+      msa.state_check_buff = (void* )0;
+      msa.state_check_buff_size = 0;   /* NO NEED, for valgrind */
 #endif
-      MATCH_AND_RETURN_CHECK;
+      MATCH_AND_RETURN_CHECK(end);
       goto mismatch;
     }
     goto mismatch_no_msa;
@@ -3754,9 +3603,9 @@ onig_search(regex_t* reg, const UChar* str, const UChar* end,
            prev = low_prev;
          }
          while (s <= high) {
-           MATCH_AND_RETURN_CHECK;
+           MATCH_AND_RETURN_CHECK(orig_range);
            prev = s;
-           s += enc_len(reg->enc, s);
+           s += enclen(reg->enc, s);
          }
        } while (s < range);
        goto mismatch;
@@ -3767,13 +3616,13 @@ onig_search(regex_t* reg, const UChar* str, const UChar* end,
 
         if ((reg->anchor & ANCHOR_ANYCHAR_STAR) != 0) {
           do {
-            MATCH_AND_RETURN_CHECK;
+            MATCH_AND_RETURN_CHECK(orig_range);
             prev = s;
-            s += enc_len(reg->enc, s);
+            s += enclen(reg->enc, s);
 
             while (!ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end) && s < range) {
               prev = s;
-              s += enc_len(reg->enc, s);
+              s += enclen(reg->enc, s);
             }
           } while (s < range);
           goto mismatch;
@@ -3782,16 +3631,21 @@ onig_search(regex_t* reg, const UChar* str, const UChar* end,
     }
 
     do {
-      MATCH_AND_RETURN_CHECK;
+      MATCH_AND_RETURN_CHECK(orig_range);
       prev = s;
-      s += enc_len(reg->enc, s);
+      s += enclen(reg->enc, s);
     } while (s < range);
 
     if (s == range) { /* because empty match with /$/. */
-      MATCH_AND_RETURN_CHECK;
+      MATCH_AND_RETURN_CHECK(orig_range);
     }
   }
   else {  /* backward search */
+#ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE
+    if (orig_start < end)
+      orig_start += enclen(reg->enc, orig_start); /* is upper range */
+#endif
+
     if (reg->optimize != ONIG_OPTIMIZE_NONE) {
       UChar *low, *high, *adjrange, *sch_start;
 
@@ -3814,7 +3668,7 @@ onig_search(regex_t* reg, const UChar* str, const UChar* end,
 
          while (s >= low) {
            prev = onigenc_get_prev_char_head(reg->enc, str, s);
-           MATCH_AND_RETURN_CHECK;
+           MATCH_AND_RETURN_CHECK(orig_start);
            s = prev;
          }
        } while (s >= range);
@@ -3842,7 +3696,7 @@ onig_search(regex_t* reg, const UChar* str, const UChar* end,
 
     do {
       prev = onigenc_get_prev_char_head(reg->enc, str, s);
-      MATCH_AND_RETURN_CHECK;
+      MATCH_AND_RETURN_CHECK(orig_start);
       s = prev;
     } while (s >= range);
   }
@@ -3865,7 +3719,7 @@ onig_search(regex_t* reg, const UChar* str, const UChar* end,
   /* If result is mismatch and no FIND_NOT_EMPTY option,
      then the region is not setted in match_at(). */
   if (IS_FIND_NOT_EMPTY(reg->options) && region
-#ifdef USE_POSIX_REGION_OPTION
+#ifdef USE_POSIX_API_REGION_OPTION
       && !IS_POSIX_REGION(option)
 #endif
       ) {
@@ -3906,10 +3760,10 @@ onig_get_options(regex_t* reg)
   return reg->options;
 }
 
-extern  OnigAmbigType
-onig_get_ambig_flag(regex_t* reg)
+extern  OnigCaseFoldType
+onig_get_case_fold_flag(regex_t* reg)
 {
-  return reg->ambig_flag;
+  return reg->case_fold_flag;
 }
 
 extern OnigSyntaxType*
index f5ad1f35a29e641a07a5c0c5729039f2f8ef364e..b1b957b40c1b374fca1406737d22d0305f8c9b72 100755 (executable)
@@ -2,7 +2,7 @@
   regext.c -  Oniguruma (regular expression library)
 **********************************************************************/
 /*-
- * Copyright (c) 2002-2006  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp>
+ * Copyright (c) 2002-2008  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -100,7 +100,7 @@ conv_encoding(OnigEncoding from, OnigEncoding to, const UChar* s, const UChar* e
   if (to == ONIG_ENCODING_UTF16_BE) {
     if (from == ONIG_ENCODING_ASCII || from == ONIG_ENCODING_ISO_8859_1) {
       *conv = (UChar* )xmalloc(len * 2);
-      CHECK_NULL_RETURN_VAL(*conv, ONIGERR_MEMORY);
+      CHECK_NULL_RETURN_MEMERR(*conv);
       *conv_end = *conv + (len * 2);
       conv_ext0be(s, end, *conv);
       return 0;
@@ -108,7 +108,7 @@ conv_encoding(OnigEncoding from, OnigEncoding to, const UChar* s, const UChar* e
     else if (from == ONIG_ENCODING_UTF16_LE) {
     swap16:
       *conv = (UChar* )xmalloc(len);
-      CHECK_NULL_RETURN_VAL(*conv, ONIGERR_MEMORY);
+      CHECK_NULL_RETURN_MEMERR(*conv);
       *conv_end = *conv + len;
       conv_swap2bytes(s, end, *conv);
       return 0;
@@ -117,7 +117,7 @@ conv_encoding(OnigEncoding from, OnigEncoding to, const UChar* s, const UChar* e
   else if (to == ONIG_ENCODING_UTF16_LE) {
     if (from == ONIG_ENCODING_ASCII || from == ONIG_ENCODING_ISO_8859_1) {
       *conv = (UChar* )xmalloc(len * 2);
-      CHECK_NULL_RETURN_VAL(*conv, ONIGERR_MEMORY);
+      CHECK_NULL_RETURN_MEMERR(*conv);
       *conv_end = *conv + (len * 2);
       conv_ext0le(s, end, *conv);
       return 0;
@@ -129,7 +129,7 @@ conv_encoding(OnigEncoding from, OnigEncoding to, const UChar* s, const UChar* e
   if (to == ONIG_ENCODING_UTF32_BE) {
     if (from == ONIG_ENCODING_ASCII || from == ONIG_ENCODING_ISO_8859_1) {
       *conv = (UChar* )xmalloc(len * 4);
-      CHECK_NULL_RETURN_VAL(*conv, ONIGERR_MEMORY);
+      CHECK_NULL_RETURN_MEMERR(*conv);
       *conv_end = *conv + (len * 4);
       conv_ext0be32(s, end, *conv);
       return 0;
@@ -137,7 +137,7 @@ conv_encoding(OnigEncoding from, OnigEncoding to, const UChar* s, const UChar* e
     else if (from == ONIG_ENCODING_UTF32_LE) {
     swap32:
       *conv = (UChar* )xmalloc(len);
-      CHECK_NULL_RETURN_VAL(*conv, ONIGERR_MEMORY);
+      CHECK_NULL_RETURN_MEMERR(*conv);
       *conv_end = *conv + len;
       conv_swap4bytes(s, end, *conv);
       return 0;
@@ -146,7 +146,7 @@ conv_encoding(OnigEncoding from, OnigEncoding to, const UChar* s, const UChar* e
   else if (to == ONIG_ENCODING_UTF32_LE) {
     if (from == ONIG_ENCODING_ASCII || from == ONIG_ENCODING_ISO_8859_1) {
       *conv = (UChar* )xmalloc(len * 4);
-      CHECK_NULL_RETURN_VAL(*conv, ONIGERR_MEMORY);
+      CHECK_NULL_RETURN_MEMERR(*conv);
       *conv_end = *conv + (len * 4);
       conv_ext0le32(s, end, *conv);
       return 0;
@@ -178,17 +178,24 @@ onig_new_deluxe(regex_t** reg, const UChar* pattern, const UChar* pattern_end,
     cpat_end = (UChar* )pattern_end;
   }
 
-  r = onig_alloc_init(reg, ci->option, ci->ambig_flag, ci->target_enc,
-                      ci->syntax);
+  *reg = (regex_t* )xmalloc(sizeof(regex_t));
+  if (IS_NULL(*reg)) {
+    r = ONIGERR_MEMORY;
+    goto err2;
+  }
+
+  r = onig_reg_init(*reg, ci->option, ci->case_fold_flag, ci->target_enc,
+                   ci->syntax);
   if (r) goto err;
 
   r = onig_compile(*reg, cpat, cpat_end, einfo);
   if (r) {
+  err:
     onig_free(*reg);
     *reg = NULL;
   }
 
- err:
+ err2:
   if (cpat != pattern) xfree(cpat);
 
   return r;
index 248957c9d9a2ecb5000158162e12fb353b2bad33..4bd18c45e1a8bc9e417586591d8a50b63aef9758 100644 (file)
@@ -2,7 +2,7 @@
   reggnu.c -  Oniguruma (regular expression library)
 **********************************************************************/
 /*-
- * Copyright (c) 2002-2006  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp>
+ * Copyright (c) 2002-2008  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -85,7 +85,7 @@ re_compile_pattern(const char* pattern, int size, regex_t* reg, char* ebuf)
   OnigErrorInfo einfo;
 
   r = onig_compile(reg, (UChar* )pattern, (UChar* )(pattern + size), &einfo);
-  if (r != 0) {
+  if (r != ONIG_NORMAL) {
     if (IS_NOT_NULL(ebuf))
       (void )onig_error_code_to_str((UChar* )ebuf, r, &einfo);
   }
@@ -108,7 +108,7 @@ re_recompile_pattern(const char* pattern, int size, regex_t* reg, char* ebuf)
 
   r = onig_recompile(reg, (UChar* )pattern, (UChar* )(pattern + size),
                     reg->options, enc, OnigDefaultSyntax, &einfo);
-  if (r != 0) {
+  if (r != ONIG_NORMAL) {
     if (IS_NOT_NULL(ebuf))
       (void )onig_error_code_to_str((UChar* )ebuf, r, &einfo);
   }
@@ -125,10 +125,13 @@ re_free_pattern(regex_t* reg)
 extern int
 re_alloc_pattern(regex_t** reg)
 {
-  return onig_alloc_init(reg, ONIG_OPTION_DEFAULT,
-                         ONIGENC_AMBIGUOUS_MATCH_DEFAULT,
-                         OnigEncDefaultCharEncoding,
-                        OnigDefaultSyntax);
+  *reg = (regex_t* )xmalloc(sizeof(regex_t));
+  if (IS_NULL(*reg)) return ONIGERR_MEMORY;
+
+  return onig_reg_init(*reg, ONIG_OPTION_DEFAULT,
+                      ONIGENC_CASE_FOLD_DEFAULT,
+                      OnigEncDefaultCharEncoding,
+                      OnigDefaultSyntax);
 }
 
 extern void
@@ -138,18 +141,8 @@ re_set_casetable(const char* table)
 }
 
 extern void
-#ifdef ONIG_RUBY_M17N
-re_mbcinit(OnigEncoding enc)
-#else
 re_mbcinit(int mb_code)
-#endif
 {
-#ifdef ONIG_RUBY_M17N
-
-  onigenc_set_default_encoding(enc);
-
-#else
-
   OnigEncoding enc;
 
   switch (mb_code) {
@@ -171,5 +164,4 @@ re_mbcinit(int mb_code)
   }
 
   onigenc_set_default_encoding(enc);
-#endif
 }
index d6819d8f949d4d01dd81f7ff512ec853b7779ac4..a0ce4912d8cf54e6c9f7f253712769bafb4d2f89 100644 (file)
@@ -4,7 +4,7 @@
   regint.h -  Oniguruma (regular expression library)
 **********************************************************************/
 /*-
- * Copyright (c) 2002-2007  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp>
+ * Copyright (c) 2002-2008  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
 
 /* config */
 /* spec. config */
-/* #define USE_UNICODE_FULL_RANGE_CTYPE */ /* --> move to regenc.h */
 #define USE_NAMED_GROUP
 #define USE_SUBEXP_CALL
-#define USE_INFINITE_REPEAT_MONOMANIAC_MEM_STATUS_CHECK /* /(?:()|())*\2/ */
+#define USE_BACKREF_WITH_LEVEL        /* \k<name+n>, \k<name-n> */
+#define USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT  /* /(?:()|())*\2/ */
 #define USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE     /* /\n$/ =~ "\n" */
 #define USE_WARNING_REDUNDANT_NESTED_REPEAT_OPERATOR
 /* #define USE_RECOMPILE_API */
-/* treat \r\n as line terminator.
-   !!! NO SUPPORT !!!
-   use this configuration on your own responsibility */
-/* #define USE_CRNL_AS_LINE_TERMINATOR */
+/* !!! moved to regenc.h. */ /* #define USE_CRNL_AS_LINE_TERMINATOR */
 
 /* internal config */
-#define USE_RECYCLE_NODE
+#define USE_PARSE_TREE_NODE_RECYCLE
 #define USE_OP_PUSH_OR_JUMP_EXACT
-#define USE_QUANTIFIER_PEEK_NEXT
-#define USE_ST_HASH_TABLE
+#define USE_QTFR_PEEK_NEXT
+#define USE_ST_LIBRARY
 #define USE_SHARED_CCLASS_TABLE
 
 #define INIT_MATCH_STACK_SIZE                     160
 #define DEFAULT_MATCH_STACK_LIMIT_SIZE              0 /* unlimited */
 
-/* interface to external system */
-#ifdef NOT_RUBY      /* given from Makefile */
+#if defined(__GNUC__)
+#  define ARG_UNUSED  __attribute__ ((unused))
+#else
+#  define ARG_UNUSED
+#endif
+
+/* */
+/* escape other system UChar definition */
 #include "config.h"
-#define USE_BACKREF_AT_LEVEL
+#ifdef ONIG_ESCAPE_UCHAR_COLLISION
+#undef ONIG_ESCAPE_UCHAR_COLLISION
+#endif
+
+#define USE_WORD_BEGIN_END        /* "\<", "\>" */
 #define USE_CAPTURE_HISTORY
 #define USE_VARIABLE_META_CHARS
-#define USE_WORD_BEGIN_END          /* "\<": word-begin, "\>": word-end */
-#define USE_POSIX_REGION_OPTION     /* needed for POSIX API support */
+#define USE_POSIX_API_REGION_OPTION
 #define USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
 /* #define USE_COMBINATION_EXPLOSION_CHECK */     /* (X*)* */
+
 /* #define USE_MULTI_THREAD_SYSTEM */
-#define THREAD_SYSTEM_INIT          /* depend on thread system */
-#define THREAD_SYSTEM_END           /* depend on thread system */
-#define THREAD_ATOMIC_START         /* depend on thread system */
-#define THREAD_ATOMIC_END           /* depend on thread system */
-#define THREAD_PASS                 /* depend on thread system */
+#define THREAD_SYSTEM_INIT      /* depend on thread system */
+#define THREAD_SYSTEM_END       /* depend on thread system */
+#define THREAD_ATOMIC_START     /* depend on thread system */
+#define THREAD_ATOMIC_END       /* depend on thread system */
+#define THREAD_PASS             /* depend on thread system */
 #define xmalloc     malloc
 #define xrealloc    realloc
 #define xcalloc     calloc
 #define xfree       free
-#else
-#include "ruby.h"
-#include "rubysig.h"      /* for DEFER_INTS, ENABLE_INTS */
-
-#define USE_COMBINATION_EXPLOSION_CHECK        /* (X*)* */
-#define USE_MULTI_THREAD_SYSTEM
-#define THREAD_SYSTEM_INIT
-#define THREAD_SYSTEM_END
-#define THREAD_ATOMIC_START          DEFER_INTS
-#define THREAD_ATOMIC_END            ENABLE_INTS
-#define THREAD_PASS                  rb_thread_schedule()
 
-#define DEFAULT_WARN_FUNCTION        onig_rb_warn
-#define DEFAULT_VERB_WARN_FUNCTION   onig_rb_warning
+#define CHECK_INTERRUPT_IN_MATCH_AT
 
-#endif /* else NOT_RUBY */
+#define st_init_table                  onig_st_init_table
+#define st_init_table_with_size        onig_st_init_table_with_size
+#define st_init_numtable               onig_st_init_numtable
+#define st_init_numtable_with_size     onig_st_init_numtable_with_size
+#define st_init_strtable               onig_st_init_strtable
+#define st_init_strtable_with_size     onig_st_init_strtable_with_size
+#define st_delete                      onig_st_delete
+#define st_delete_safe                 onig_st_delete_safe
+#define st_insert                      onig_st_insert
+#define st_lookup                      onig_st_lookup
+#define st_foreach                     onig_st_foreach
+#define st_add_direct                  onig_st_add_direct
+#define st_free_table                  onig_st_free_table
+#define st_cleanup_safe                onig_st_cleanup_safe
+#define st_copy                        onig_st_copy
+#define st_nothing_key_clone           onig_st_nothing_key_clone
+#define st_nothing_key_free            onig_st_nothing_key_free
+/* */
+#define onig_st_is_member              st_is_member
 
 #define STATE_CHECK_STRING_THRESHOLD_LEN             7
 #define STATE_CHECK_BUFF_MAX_SIZE               0x4000
 #define xmemset     memset
 #define xmemcpy     memcpy
 #define xmemmove    memmove
+
 #if defined(_WIN32) && !defined(__GNUC__)
 #define xalloca     _alloca
-#if _MSC_VER < 1500
-#ifndef vsnprintf
-#define vsnprintf   _vsnprintf
-#endif
-#endif
+#define xvsnprintf  _vsnprintf
 #else
 #define xalloca     alloca
+#define xvsnprintf  vsnprintf
 #endif
 
+
 #if defined(USE_RECOMPILE_API) && defined(USE_MULTI_THREAD_SYSTEM)
 #define ONIG_STATE_INC(reg) (reg)->state++
 #define ONIG_STATE_DEC(reg) (reg)->state--
 #define ONIG_STATE_DEC_THREAD(reg)  /* Nothing */
 #endif /* USE_RECOMPILE_API && USE_MULTI_THREAD_SYSTEM */
 
-
-#define onig_st_is_member              st_is_member
-
-#ifdef NOT_RUBY
-
-#define st_init_table                  onig_st_init_table
-#define st_init_table_with_size        onig_st_init_table_with_size
-#define st_init_numtable               onig_st_init_numtable
-#define st_init_numtable_with_size     onig_st_init_numtable_with_size
-#define st_init_strtable               onig_st_init_strtable
-#define st_init_strtable_with_size     onig_st_init_strtable_with_size
-#define st_init_strend_table_with_size onig_st_init_strend_table_with_size
-#define st_delete                      onig_st_delete
-#define st_delete_safe                 onig_st_delete_safe
-#define st_insert                      onig_st_insert
-#define st_insert_strend               onig_st_insert_strend
-#define st_lookup                      onig_st_lookup
-#define st_lookup_strend               onig_st_lookup_strend
-#define st_foreach                     onig_st_foreach
-#define st_add_direct                  onig_st_add_direct
-#define st_add_direct_strend           onig_st_add_direct_strend
-#define st_free_table                  onig_st_free_table
-#define st_cleanup_safe                onig_st_cleanup_safe
-#define st_copy                        onig_st_copy
-#define st_nothing_key_clone           onig_st_nothing_key_clone
-#define st_nothing_key_free            onig_st_nothing_key_free
-
-#else /* NOT_RUBY */
-
-#define onig_st_init_table                  st_init_table
-#define onig_st_init_table_with_size        st_init_table_with_size
-#define onig_st_init_numtable               st_init_numtable
-#define onig_st_init_numtable_with_size     st_init_numtable_with_size
-#define onig_st_init_strtable               st_init_strtable
-#define onig_st_init_strtable_with_size     st_init_strtable_with_size
-#define onig_st_init_strend_table_with_size st_init_strend_table_with_size
-#define onig_st_delete                      st_delete
-#define onig_st_delete_safe                 st_delete_safe
-#define onig_st_insert                      st_insert
-#define onig_st_insert_strend               st_insert_strend
-#define onig_st_lookup                      st_lookup
-#define onig_st_lookup_strend               st_lookup_strend
-#define onig_st_foreach                     st_foreach
-#define onig_st_add_direct                  st_add_direct
-#define onig_st_add_direct_strend           st_add_direct_strend
-#define onig_st_free_table                  st_free_table
-#define onig_st_cleanup_safe                st_cleanup_safe
-#define onig_st_copy                        st_copy
-#define onig_st_nothing_key_clone           st_nothing_key_clone
-#define onig_st_nothing_key_free            st_nothing_key_free
-
-#endif /* NOT_RUBY */
-
-
 #ifdef HAVE_STDLIB_H
 #include <stdlib.h>
 #endif
 #endif
 
 #include "regenc.h"
-#include "oniguruma.h"
 
 #ifdef MIN
 #undef MIN
 #define IS_NULL(p)                    (((void*)(p)) == (void*)0)
 #define IS_NOT_NULL(p)                (((void*)(p)) != (void*)0)
 #define CHECK_NULL_RETURN(p)          if (IS_NULL(p)) return NULL
-#define CHECK_NULL_RETURN_VAL(p,val)  if (IS_NULL(p)) return (val)
+#define CHECK_NULL_RETURN_MEMERR(p)   if (IS_NULL(p)) return ONIGERR_MEMORY
 #define NULL_UCHARP                   ((UChar* )0)
 
-#ifndef PLATFORM_UNALIGNED_WORD_ACCESS
+#ifdef PLATFORM_UNALIGNED_WORD_ACCESS
+
+#define PLATFORM_GET_INC(val,p,type) do{\
+  val  = *(type* )p;\
+  (p) += sizeof(type);\
+} while(0)
+
+#else
+
+#define PLATFORM_GET_INC(val,p,type) do{\
+  xmemcpy(&val, (p), sizeof(type));\
+  (p) += sizeof(type);\
+} while(0)
+
 /* sizeof(OnigCodePoint) */
 #define WORD_ALIGNMENT_SIZE     SIZEOF_LONG
 
   (addr) -= ((unsigned int )(addr) % WORD_ALIGNMENT_SIZE);\
 } while (0)
 
-
-#define B_SHIFT  8
-#define B_MASK   0xff
-
-#define SERIALIZE_2BYTE_INT(i,p) do {\
-  *(p)     = ((i) >> B_SHIFT) & B_MASK;\
-  *((p)+1) = (i) & B_MASK;\
-} while (0)
-
-#define SERIALIZE_4BYTE_INT(i,p) do {\
-  *(p)     = ((i) >> B_SHIFT*3) & B_MASK;\
-  *((p)+1) = ((i) >> B_SHIFT*2) & B_MASK;\
-  *((p)+2) = ((i) >> B_SHIFT  ) & B_MASK;\
-  *((p)+3) = (i) & B_MASK;\
-} while (0)
-
-#define SERIALIZE_8BYTE_INT(i,p) do {\
-  *(p)     = ((i) >> B_SHIFT*7) & B_MASK;\
-  *((p)+1) = ((i) >> B_SHIFT*6) & B_MASK;\
-  *((p)+2) = ((i) >> B_SHIFT*5) & B_MASK;\
-  *((p)+3) = ((i) >> B_SHIFT*4) & B_MASK;\
-  *((p)+4) = ((i) >> B_SHIFT*3) & B_MASK;\
-  *((p)+5) = ((i) >> B_SHIFT*2) & B_MASK;\
-  *((p)+6) = ((i) >> B_SHIFT  ) & B_MASK;\
-  *((p)+7) = (i) & B_MASK;\
-} while (0)
-
-#define GET_2BYTE_INT_INC(type,i,p) do {\
-  (i) = (type )(((unsigned int )(*(p)) << B_SHIFT) | (unsigned int )((p)[1]));\
-  (p) += 2;\
-} while (0)
-
-#define GET_4BYTE_INT_INC(type,i,p) do {\
-  (i) = (type )(((unsigned int )((p)[0]) << B_SHIFT*3) | \
-               ((unsigned int )((p)[1]) << B_SHIFT*2) | \
-               ((unsigned int )((p)[2]) << B_SHIFT  ) | \
-               ((unsigned int )((p)[3])             )); \
-  (p) += 4;\
-} while (0)
-
-#define GET_8BYTE_INT_INC(type,i,p) do {\
-  (i) = (type )(((unsigned long )((p)[0]) << B_SHIFT*7) | \
-               ((unsigned long )((p)[1]) << B_SHIFT*6) | \
-               ((unsigned long )((p)[2]) << B_SHIFT*5) | \
-               ((unsigned long )((p)[3]) << B_SHIFT*4) | \
-               ((unsigned long )((p)[4]) << B_SHIFT*3) | \
-               ((unsigned long )((p)[5]) << B_SHIFT*2) | \
-               ((unsigned long )((p)[6]) << B_SHIFT  ) | \
-               ((unsigned long )((p)[7])             )); \
-  (p) += 8;\
-} while (0)
-
-#if SIZEOF_SHORT == 2
-#define GET_SHORT_INC(i,p)     GET_2BYTE_INT_INC(short,i,p)
-#define SERIALIZE_SHORT(i,p)   SERIALIZE_2BYTE_INT(i,p)
-#elif SIZEOF_SHORT == 4
-#define GET_SHORT_INC(i,p)     GET_4BYTE_INT_INC(short,i,p)
-#define SERIALIZE_SHORT(i,p)   SERIALIZE_4BYTE_INT(i,p)
-#elif SIZEOF_SHORT == 8
-#define GET_SHORT_INC(i,p)     GET_8BYTE_INT_INC(short,i,p)
-#define SERIALIZE_SHORT(i,p)   SERIALIZE_8BYTE_INT(i,p)
-#endif
-
-#if SIZEOF_INT == 2
-#define GET_INT_INC(i,p)       GET_2BYTE_INT_INC(int,i,p)
-#define GET_UINT_INC(i,p)      GET_2BYTE_INT_INC(unsigned,i,p)
-#define SERIALIZE_INT(i,p)     SERIALIZE_2BYTE_INT(i,p)
-#define SERIALIZE_UINT(i,p)    SERIALIZE_2BYTE_INT(i,p)
-#elif SIZEOF_INT == 4
-#define GET_INT_INC(i,p)       GET_4BYTE_INT_INC(int,i,p)
-#define GET_UINT_INC(i,p)      GET_4BYTE_INT_INC(unsigned,i,p)
-#define SERIALIZE_INT(i,p)     SERIALIZE_4BYTE_INT(i,p)
-#define SERIALIZE_UINT(i,p)    SERIALIZE_4BYTE_INT(i,p)
-#elif SIZEOF_INT == 8
-#define GET_INT_INC(i,p)       GET_8BYTE_INT_INC(int,i,p)
-#define GET_UINT_INC(i,p)      GET_8BYTE_INT_INC(unsigned,i,p)
-#define SERIALIZE_INT(i,p)     SERIALIZE_8BYTE_INT(i,p)
-#define SERIALIZE_UINT(i,p)    SERIALIZE_8BYTE_INT(i,p)
-#endif
-
 #endif /* PLATFORM_UNALIGNED_WORD_ACCESS */
 
 /* stack pop level */
@@ -372,17 +262,17 @@ typedef unsigned int  BitStatusType;
 #define BIT_STATUS_CLEAR(stats)      (stats) = 0
 #define BIT_STATUS_ON_ALL(stats)     (stats) = ~((BitStatusType )0)
 #define BIT_STATUS_AT(stats,n) \
-  ((n) < BIT_STATUS_BITS_NUM  ?  ((stats) & (1 << n)) : ((stats) & 1))
+  ((n) < (int )BIT_STATUS_BITS_NUM  ?  ((stats) & (1 << n)) : ((stats) & 1))
 
 #define BIT_STATUS_ON_AT(stats,n) do {\
-  if ((n) < BIT_STATUS_BITS_NUM)\
+    if ((n) < (int )BIT_STATUS_BITS_NUM)       \
     (stats) |= (1 << (n));\
   else\
     (stats) |= 1;\
 } while (0)
 
 #define BIT_STATUS_ON_AT_SIMPLE(stats,n) do {\
-  if ((n) < BIT_STATUS_BITS_NUM)\
+    if ((n) < (int )BIT_STATUS_BITS_NUM)\
     (stats) |= (1 << (n));\
 } while (0)
 
@@ -401,7 +291,6 @@ typedef unsigned int  BitStatusType;
 #define IS_EXTEND(option)         ((option) & ONIG_OPTION_EXTEND)
 #define IS_FIND_LONGEST(option)   ((option) & ONIG_OPTION_FIND_LONGEST)
 #define IS_FIND_NOT_EMPTY(option) ((option) & ONIG_OPTION_FIND_NOT_EMPTY)
-#define IS_POSIXLINE(option)      (IS_SINGLELINE(option) && IS_MULTILINE(option))
 #define IS_FIND_CONDITION(option) ((option) & \
           (ONIG_OPTION_FIND_LONGEST | ONIG_OPTION_FIND_NOT_EMPTY))
 #define IS_NOTBOL(option)         ((option) & ONIG_OPTION_NOTBOL)
@@ -415,6 +304,9 @@ typedef unsigned int  BitStatusType;
 /* ignore-case and multibyte status are included in compiled code. */
 #define IS_DYNAMIC_OPTION(option)  0
 
+#define DISABLE_CASE_FOLD_MULTI_CHAR(case_fold_flag) \
+  ((case_fold_flag) & ~INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR)
+
 #define REPEAT_INFINITE         -1
 #define IS_REPEAT_INFINITE(n)   ((n) == REPEAT_INFINITE)
 
@@ -436,7 +328,7 @@ typedef Bits*          BitSetRef;
 
 #define BITSET_CLEAR(bs) do {\
   int i;\
-  for (i = 0; i < BITSET_SIZE; i++) { (bs)[i] = 0; }\
+  for (i = 0; i < (int )BITSET_SIZE; i++) { (bs)[i] = 0; }     \
 } while (0)
 
 #define BS_ROOM(bs,pos)            (bs)[pos / BITS_IN_ROOM]
@@ -603,7 +495,7 @@ enum OpCode {
   OP_BACKREFN_IC,
   OP_BACKREF_MULTI,
   OP_BACKREF_MULTI_IC,
-  OP_BACKREF_AT_LEVEL,    /* \k<xxx+n>, \k<xxx-n> */
+  OP_BACKREF_WITH_LEVEL,    /* \k<xxx+n>, \k<xxx-n> */
 
   OP_MEMORY_START,
   OP_MEMORY_START_PUSH,   /* push back-tracker to stack */
@@ -612,9 +504,6 @@ enum OpCode {
   OP_MEMORY_END,
   OP_MEMORY_END_REC,      /* push marker to stack */
 
-  OP_SET_OPTION_PUSH,    /* set option and push recover option */
-  OP_SET_OPTION,         /* set option */
-
   OP_FAIL,               /* pop stack and move */
   OP_JUMP,
   OP_PUSH,
@@ -649,7 +538,11 @@ enum OpCode {
   OP_STATE_CHECK_PUSH_OR_JUMP, /* check ok -> push, else jump  */
   OP_STATE_CHECK,              /* check only */
   OP_STATE_CHECK_ANYCHAR_STAR,
-  OP_STATE_CHECK_ANYCHAR_ML_STAR
+  OP_STATE_CHECK_ANYCHAR_ML_STAR,
+
+  /* no need: IS_DYNAMIC_OPTION() == 0 */
+  OP_SET_OPTION_PUSH,    /* set option and push recover option */
+  OP_SET_OPTION          /* set option */
 };
 
 typedef int RelAddrType;
@@ -672,22 +565,6 @@ typedef void* PointerType;
 #define SIZE_POINTER          sizeof(PointerType)
 
 
-#ifdef PLATFORM_UNALIGNED_WORD_ACCESS
-
-#define PLATFORM_GET_INC(val,p,type) do{\
-  val  = *(type* )p;\
-  (p) += sizeof(type);\
-} while(0)
-
-#else
-
-#define PLATFORM_GET_INC(val,p,type) do{\
-  xmemcpy(&val, (p), sizeof(type));\
-  (p) += sizeof(type);\
-} while(0)
-
-#endif  /* PLATFORM_UNALIGNED_WORD_ACCESS */
-
 #define GET_RELADDR_INC(addr,p)    PLATFORM_GET_INC(addr,   p, RelAddrType)
 #define GET_ABSADDR_INC(addr,p)    PLATFORM_GET_INC(addr,   p, AbsAddrType)
 #define GET_LENGTH_INC(len,p)      PLATFORM_GET_INC(len,    p, LengthType)
@@ -745,15 +622,15 @@ typedef void* PointerType;
 #define SIZE_OP_STATE_CHECK_ANYCHAR_STAR (SIZE_OPCODE + SIZE_STATE_CHECK_NUM)
 #endif
 
-#define MC_ESC(enc)               (enc)->meta_char_table.esc
-#define MC_ANYCHAR(enc)           (enc)->meta_char_table.anychar
-#define MC_ANYTIME(enc)           (enc)->meta_char_table.anytime
-#define MC_ZERO_OR_ONE_TIME(enc)  (enc)->meta_char_table.zero_or_one_time
-#define MC_ONE_OR_MORE_TIME(enc)  (enc)->meta_char_table.one_or_more_time
-#define MC_ANYCHAR_ANYTIME(enc)   (enc)->meta_char_table.anychar_anytime
+#define MC_ESC(syn)               (syn)->meta_char_table.esc
+#define MC_ANYCHAR(syn)           (syn)->meta_char_table.anychar
+#define MC_ANYTIME(syn)           (syn)->meta_char_table.anytime
+#define MC_ZERO_OR_ONE_TIME(syn)  (syn)->meta_char_table.zero_or_one_time
+#define MC_ONE_OR_MORE_TIME(syn)  (syn)->meta_char_table.one_or_more_time
+#define MC_ANYCHAR_ANYTIME(syn)   (syn)->meta_char_table.anychar_anytime
 
-#define IS_MC_ESC_CODE(code, enc, syn) \
-  ((code) == MC_ESC(enc) && \
+#define IS_MC_ESC_CODE(code, syn) \
+  ((code) == MC_ESC(syn) && \
    !IS_SYNTAX_OP2((syn), ONIG_SYN_OP2_INEFFECTIVE_ESCAPE))
 
 
@@ -782,22 +659,96 @@ typedef void* PointerType;
     ONIG_SYN_CONTEXT_INVALID_REPEAT_OPS | ONIG_SYN_ALLOW_INVALID_INTERVAL | \
     ONIG_SYN_BACKSLASH_ESCAPE_IN_CC | ONIG_SYN_ALLOW_DOUBLE_RANGE_OP_IN_CC )
 
+
+#define NCCLASS_FLAGS(cc)           ((cc)->flags)
+#define NCCLASS_FLAG_SET(cc,flag)    (NCCLASS_FLAGS(cc) |= (flag))
+#define NCCLASS_FLAG_CLEAR(cc,flag)  (NCCLASS_FLAGS(cc) &= ~(flag))
+#define IS_NCCLASS_FLAG_ON(cc,flag) ((NCCLASS_FLAGS(cc) & (flag)) != 0)
+
 /* cclass node */
-#define FLAG_CCLASS_NOT            1
-#define FLAG_CCLASS_SHARE         (1<<1)
+#define FLAG_NCCLASS_NOT           (1<<0)
+#define FLAG_NCCLASS_SHARE         (1<<1)
 
-#define CCLASS_SET_NOT(cc)      (cc)->flags |= FLAG_CCLASS_NOT
-#define CCLASS_CLEAR_NOT(cc)    (cc)->flags &= ~FLAG_CCLASS_NOT
-#define CCLASS_SET_SHARE(cc)    (cc)->flags |= FLAG_CCLASS_SHARE
-#define IS_CCLASS_NOT(cc)     (((cc)->flags & FLAG_CCLASS_NOT) != 0)
-#define IS_CCLASS_SHARE(cc)   (((cc)->flags & FLAG_CCLASS_SHARE) != 0)
+#define NCCLASS_SET_NOT(nd)     NCCLASS_FLAG_SET(nd, FLAG_NCCLASS_NOT)
+#define NCCLASS_SET_SHARE(nd)   NCCLASS_FLAG_SET(nd, FLAG_NCCLASS_SHARE)
+#define NCCLASS_CLEAR_NOT(nd)   NCCLASS_FLAG_CLEAR(nd, FLAG_NCCLASS_NOT)
+#define IS_NCCLASS_NOT(nd)      IS_NCCLASS_FLAG_ON(nd, FLAG_NCCLASS_NOT)
+#define IS_NCCLASS_SHARE(nd)    IS_NCCLASS_FLAG_ON(nd, FLAG_NCCLASS_SHARE)
 
 typedef struct {
-  int    flags;
+  int type;
+  /* struct _Node* next; */
+  /* unsigned int flags; */
+} NodeBase;
+
+typedef struct {
+  NodeBase base;
+  unsigned int flags;
   BitSet bs;
-  BBuf*  mbuf;     /* multi-byte info or NULL */
+  BBuf*  mbuf;   /* multi-byte info or NULL */
 } CClassNode;
 
+typedef long OnigStackIndex;
+
+typedef struct _OnigStackType {
+  unsigned int type;
+  union {
+    struct {
+      UChar *pcode;      /* byte code position */
+      UChar *pstr;       /* string position */
+      UChar *pstr_prev;  /* previous char position of pstr */
+#ifdef USE_COMBINATION_EXPLOSION_CHECK
+      unsigned int state_check;
+#endif
+    } state;
+    struct {
+      int   count;       /* for OP_REPEAT_INC, OP_REPEAT_INC_NG */
+      UChar *pcode;      /* byte code position (head of repeated target) */
+      int   num;         /* repeat id */
+    } repeat;
+    struct {
+      OnigStackIndex si;     /* index of stack */
+    } repeat_inc;
+    struct {
+      int num;           /* memory num */
+      UChar *pstr;       /* start/end position */
+      /* Following information is setted, if this stack type is MEM-START */
+      OnigStackIndex start;  /* prev. info (for backtrack  "(...)*" ) */
+      OnigStackIndex end;    /* prev. info (for backtrack  "(...)*" ) */
+    } mem;
+    struct {
+      int num;           /* null check id */
+      UChar *pstr;       /* start position */
+    } null_check;
+#ifdef USE_SUBEXP_CALL
+    struct {
+      UChar *ret_addr;   /* byte code position */
+      int    num;        /* null check id */
+      UChar *pstr;       /* string position */
+    } call_frame;
+#endif
+  } u;
+} OnigStackType;
+
+typedef struct {
+  void* stack_p;
+  int   stack_n;
+  OnigOptionType options;
+  OnigRegion*    region;
+  const UChar* start;   /* search start position (for \G: BEGIN_POSITION) */
+#ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
+  int    best_len;      /* for ONIG_OPTION_FIND_LONGEST */
+  UChar* best_s;
+#endif
+#ifdef USE_COMBINATION_EXPLOSION_CHECK
+  void* state_check_buff;
+  int   state_check_buff_size;
+#endif
+} OnigMatchArg;
+
+
+#define IS_CODE_SB_WORD(enc,code) \
+  (ONIGENC_IS_CODE_ASCII(code) && ONIGENC_IS_CODE_WORD(enc,code))
 
 #ifdef ONIG_DEBUG
 
@@ -820,11 +771,38 @@ extern void onig_print_statistics P_((FILE* f));
 extern UChar* onig_error_code_to_format P_((int code));
 extern void  onig_snprintf_with_pattern PV_((UChar buf[], int bufsize, OnigEncoding enc, UChar* pat, UChar* pat_end, const UChar *fmt, ...));
 extern int  onig_bbuf_init P_((BBuf* buf, int size));
-extern int  onig_alloc_init P_((regex_t** reg, OnigOptionType option, OnigAmbigType ambig_flag, OnigEncoding enc, OnigSyntaxType* syntax));
 extern int  onig_compile P_((regex_t* reg, const UChar* pattern, const UChar* pattern_end, OnigErrorInfo* einfo));
 extern void onig_chain_reduce P_((regex_t* reg));
 extern void onig_chain_link_add P_((regex_t* to, regex_t* add));
 extern void onig_transfer P_((regex_t* to, regex_t* from));
 extern int  onig_is_code_in_cc P_((OnigEncoding enc, OnigCodePoint code, CClassNode* cc));
+extern int  onig_is_code_in_cc_len P_((int enclen, OnigCodePoint code, CClassNode* cc));
+
+/* strend hash */
+typedef void hash_table_type;
+typedef unsigned long hash_data_type;
+
+extern hash_table_type* onig_st_init_strend_table_with_size P_((int size));
+extern int onig_st_lookup_strend P_((hash_table_type* table, const UChar* str_key, const UChar* end_key, hash_data_type *value));
+extern int onig_st_insert_strend P_((hash_table_type* table, const UChar* str_key, const UChar* end_key, hash_data_type value));
+
+/* encoding property management */
+#define PROPERTY_LIST_ADD_PROP(Name, CR) \
+  r = onigenc_property_list_add_property((UChar* )Name, CR,\
+             &PropertyNameTable, &PropertyList, &PropertyListNum,\
+             &PropertyListSize);\
+  if (r != 0) goto end
+
+#define PROPERTY_LIST_INIT_CHECK \
+  if (PropertyInited == 0) {\
+    int r = onigenc_property_list_init(init_property_list);\
+    if (r != 0) return r;\
+  }
+
+extern int onigenc_property_list_add_property P_((UChar* name, const OnigCodePoint* prop, hash_table_type **table, const OnigCodePoint*** plist, int *pnum, int *psize));
+
+typedef int (*ONIGENC_INIT_PROPERTY_LIST_FUNC_TYPE)(void);
+
+extern int onigenc_property_list_init P_((ONIGENC_INIT_PROPERTY_LIST_FUNC_TYPE));
 
 #endif /* REGINT_H */
index af5c8593e6ea08be6c22e35991da7606723d144a..01131300c66a8697ad1f92376d3866c0ca1236c3 100644 (file)
@@ -2,7 +2,7 @@
   regparse.c -  Oniguruma (regular expression library)
 **********************************************************************/
 /*-
- * Copyright (c) 2002-2007  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp>
+ * Copyright (c) 2002-2008  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  */
 
 #include "regparse.h"
+#include "st.h"
 
 #define WARN_BUFSIZE    256
 
+#define CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS
+
+
 OnigSyntaxType OnigSyntaxRuby = {
   (( SYN_GNU_REGEX_OP | ONIG_SYN_OP_QMARK_NON_GREEDY |
      ONIG_SYN_OP_ESC_OCTAL3 | ONIG_SYN_OP_ESC_X_HEX2 |
@@ -41,6 +45,8 @@ OnigSyntaxType OnigSyntaxRuby = {
       ONIG_SYN_OP2_OPTION_RUBY |
       ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP | ONIG_SYN_OP2_ESC_K_NAMED_BACKREF |
       ONIG_SYN_OP2_ESC_G_SUBEXP_CALL |
+      ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY  |
+      ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT |
       ONIG_SYN_OP2_PLUS_POSSESSIVE_REPEAT |
       ONIG_SYN_OP2_CCLASS_SET_OP | ONIG_SYN_OP2_ESC_CAPITAL_C_BAR_CONTROL |
       ONIG_SYN_OP2_ESC_CAPITAL_M_BAR_META | ONIG_SYN_OP2_ESC_V_VTAB |
@@ -54,25 +60,20 @@ OnigSyntaxType OnigSyntaxRuby = {
       ONIG_SYN_WARN_CC_OP_NOT_ESCAPED |
       ONIG_SYN_WARN_REDUNDANT_NESTED_REPEAT )
   , ONIG_OPTION_NONE
+  ,
+  {
+      (OnigCodePoint )'\\'                       /* esc */
+    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.'  */
+    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*'  */
+    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
+    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
+    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
+  }
 };
 
 OnigSyntaxType*  OnigDefaultSyntax = ONIG_SYNTAX_RUBY;
 
-extern void onig_null_warn(const char* s) { }
-
-#ifdef RUBY_PLATFORM
-extern void
-onig_rb_warn(const char* s)
-{
-  rb_warn("%s", s);
-}
-
-extern void
-onig_rb_warning(const char* s)
-{
-  rb_warning("%s", s);
-}
-#endif
+extern void onig_null_warn(const char* s ARG_UNUSED) { }
 
 #ifdef DEFAULT_WARN_FUNCTION
 static OnigWarnFunc onig_warn = (OnigWarnFunc )DEFAULT_WARN_FUNCTION;
@@ -112,7 +113,7 @@ bbuf_clone(BBuf** rto, BBuf* from)
   BBuf *to;
 
   *rto = to = (BBuf* )xmalloc(sizeof(BBuf));
-  CHECK_NULL_RETURN_VAL(to, ONIGERR_MEMORY);
+  CHECK_NULL_RETURN_MEMERR(to);
   r = BBUF_INIT(to, from->alloc);
   if (r != 0) return r;
   to->used = from->used;
@@ -120,6 +121,9 @@ bbuf_clone(BBuf** rto, BBuf* from)
   return 0;
 }
 
+#define BACKREF_REL_TO_ABS(rel_no, env) \
+  ((env)->num_mem + 1 + (rel_no))
+
 #define ONOFF(v,f,negative)    (negative) ? ((v) &= ~(f)) : ((v) |= (f))
 
 #define MBCODE_START_POS(enc) \
@@ -139,7 +143,7 @@ bbuf_clone(BBuf** rto, BBuf* from)
 #define BITSET_IS_EMPTY(bs,empty) do {\
   int i;\
   empty = 1;\
-  for (i = 0; i < BITSET_SIZE; i++) {\
+  for (i = 0; i < (int )BITSET_SIZE; i++) {\
     if ((bs)[i] != 0) {\
       empty = 0; break;\
     }\
@@ -160,9 +164,7 @@ static void
 bitset_set_all(BitSetRef bs)
 {
   int i;
-  for (i = 0; i < BITSET_SIZE; i++) {
-    bs[i] = ~((Bits )0);
-  }
+  for (i = 0; i < BITSET_SIZE; i++) { bs[i] = ~((Bits )0); }
 }
 #endif
 
@@ -170,45 +172,35 @@ static void
 bitset_invert(BitSetRef bs)
 {
   int i;
-  for (i = 0; i < BITSET_SIZE; i++) {
-    bs[i] = ~(bs[i]);
-  }
+  for (i = 0; i < (int )BITSET_SIZE; i++) { bs[i] = ~(bs[i]); }
 }
 
 static void
 bitset_invert_to(BitSetRef from, BitSetRef to)
 {
   int i;
-  for (i = 0; i < BITSET_SIZE; i++) {
-    to[i] = ~(from[i]);
-  }
+  for (i = 0; i < (int )BITSET_SIZE; i++) { to[i] = ~(from[i]); }
 }
 
 static void
 bitset_and(BitSetRef dest, BitSetRef bs)
 {
   int i;
-  for (i = 0; i < BITSET_SIZE; i++) {
-    dest[i] &= bs[i];
-  }
+  for (i = 0; i < (int )BITSET_SIZE; i++) { dest[i] &= bs[i]; }
 }
 
 static void
 bitset_or(BitSetRef dest, BitSetRef bs)
 {
   int i;
-  for (i = 0; i < BITSET_SIZE; i++) {
-    dest[i] |= bs[i];
-  }
+  for (i = 0; i < (int )BITSET_SIZE; i++) { dest[i] |= bs[i]; }
 }
 
 static void
 bitset_copy(BitSetRef dest, BitSetRef bs)
 {
   int i;
-  for (i = 0; i < BITSET_SIZE; i++) {
-    dest[i] = bs[i];
-  }
+  for (i = 0; i < (int )BITSET_SIZE; i++) { dest[i] = bs[i]; }
 }
 
 extern int
@@ -223,8 +215,8 @@ onig_strncmp(const UChar* s1, const UChar* s2, int n)
   return 0;
 }
 
-static void
-k_strcpy(UChar* dest, const UChar* src, const UChar* end)
+extern void
+onig_strcpy(UChar* dest, const UChar* src, const UChar* end)
 {
   int len = end - src;
   if (len > 0) {
@@ -233,6 +225,7 @@ k_strcpy(UChar* dest, const UChar* src, const UChar* end)
   }
 }
 
+#ifdef USE_NAMED_GROUP
 static UChar*
 strdup_with_null(OnigEncoding enc, UChar* s, UChar* end)
 {
@@ -251,7 +244,7 @@ strdup_with_null(OnigEncoding enc, UChar* s, UChar* end)
 
   return r;
 }
-
+#endif
 
 /* scan pattern methods */
 #define PEND_VALUE   0
@@ -273,7 +266,7 @@ strdup_with_null(OnigEncoding enc, UChar* s, UChar* end)
 #define PPEEK_IS(c)  (PPEEK == (OnigCodePoint )c)
 
 static UChar*
-k_strcat_capa(UChar* dest, UChar* dest_end, const UChar* src, const UChar* src_end,
+strcat_capa(UChar* dest, UChar* dest_end, const UChar* src, const UChar* src_end,
              int capa)
 {
   UChar* r;
@@ -284,7 +277,7 @@ k_strcat_capa(UChar* dest, UChar* dest_end, const UChar* src, const UChar* src_e
     r = (UChar* )xmalloc(capa + 1);
 
   CHECK_NULL_RETURN(r);
-  k_strcpy(r + (dest_end - dest), src, src_end);
+  onig_strcpy(r + (dest_end - dest), src, src_end);
   return r;
 }
 
@@ -297,78 +290,23 @@ strcat_capa_from_static(UChar* dest, UChar* dest_end,
 
   r = (UChar* )xmalloc(capa + 1);
   CHECK_NULL_RETURN(r);
-  k_strcpy(r, dest, dest_end);
-  k_strcpy(r + (dest_end - dest), src, src_end);
+  onig_strcpy(r, dest, dest_end);
+  onig_strcpy(r + (dest_end - dest), src, src_end);
   return r;
 }
 
-#ifdef USE_NAMED_GROUP
-
-#define INIT_NAME_BACKREFS_ALLOC_NUM   8
-
-typedef struct {
-  UChar* name;
-  int    name_len;   /* byte length */
-  int    back_num;   /* number of backrefs */
-  int    back_alloc;
-  int    back_ref1;
-  int*   back_refs;
-} NameEntry;
-
-#ifdef USE_ST_HASH_TABLE
 
-#include "st.h"
+#ifdef USE_ST_LIBRARY
 
 typedef struct {
-  unsigned char* s;
-  unsigned char* end;
-} st_strend_key;
-
-static int strend_cmp(st_strend_key*, st_strend_key*);
-static int strend_hash(st_strend_key*);
-
-static struct st_hash_type type_strend_hash = {
-  strend_cmp,
-  strend_hash,
-};
-
-static st_table*
-onig_st_init_strend_table_with_size(int size)
-{
-    return onig_st_init_table_with_size(&type_strend_hash, size);
-}
-
-static int
-onig_st_lookup_strend(st_table *table, const UChar* str_key, const UChar* end_key, st_data_t *value)
-{
-    st_strend_key key;
-
-    key.s   = (unsigned char* )str_key;
-    key.end = (unsigned char* )end_key;
-
-    return onig_st_lookup(table, (st_data_t )(&key), value);
-}
-
-static int
-onig_st_insert_strend(st_table *table, const UChar* str_key, const UChar* end_key, st_data_t value)
-{
-  st_strend_key* key;
-  int result;
-
-  key = (st_strend_key* )xmalloc(sizeof(st_strend_key));
-  key->s   = (unsigned char* )str_key;
-  key->end = (unsigned char* )end_key;
-  result = onig_st_insert(table, (st_data_t )key, value);
-  if (result) {
-    xfree(key);
-  }
-  return result;
-}
+  UChar* s;
+  UChar* end;
+} st_str_end_key;
 
 static int
-strend_cmp(st_strend_key* x, st_strend_key* y)
+str_end_cmp(st_str_end_key* x, st_str_end_key* y)
 {
-  unsigned char *p, *q;
+  UChar *p, *q;
   int c;
 
   if ((x->end - x->s) != (y->end - y->s))
@@ -387,12 +325,11 @@ strend_cmp(st_strend_key* x, st_strend_key* y)
 }
 
 static int
-strend_hash(st_strend_key* x)
+str_end_hash(st_str_end_key* x)
 {
-  int val;
-  unsigned char *p;
+  UChar *p;
+  int val = 0;
 
-  val = 0;
   p = x->s;
   while (p < x->end) {
     val = val * 997 + (int )*p++;
@@ -401,6 +338,65 @@ strend_hash(st_strend_key* x)
   return val + (val >> 5);
 }
 
+extern hash_table_type*
+onig_st_init_strend_table_with_size(int size)
+{
+  static struct st_hash_type hashType = {
+    str_end_cmp,
+    str_end_hash,
+  };
+
+  return (hash_table_type* )
+           onig_st_init_table_with_size(&hashType, size);
+}
+
+extern int
+onig_st_lookup_strend(hash_table_type* table, const UChar* str_key,
+                     const UChar* end_key, hash_data_type *value)
+{
+  st_str_end_key key;
+
+  key.s   = (UChar* )str_key;
+  key.end = (UChar* )end_key;
+
+  return onig_st_lookup(table, (st_data_t )(&key), value);
+}
+
+extern int
+onig_st_insert_strend(hash_table_type* table, const UChar* str_key,
+                     const UChar* end_key, hash_data_type value)
+{
+  st_str_end_key* key;
+  int result;
+
+  key = (st_str_end_key* )xmalloc(sizeof(st_str_end_key));
+  key->s   = (UChar* )str_key;
+  key->end = (UChar* )end_key;
+  result = onig_st_insert(table, (st_data_t )key, value);
+  if (result) {
+    xfree(key);
+  }
+  return result;
+}
+
+#endif /* USE_ST_LIBRARY */
+
+
+#ifdef USE_NAMED_GROUP
+
+#define INIT_NAME_BACKREFS_ALLOC_NUM   8
+
+typedef struct {
+  UChar* name;
+  int    name_len;   /* byte length */
+  int    back_num;   /* number of backrefs */
+  int    back_alloc;
+  int    back_ref1;
+  int*   back_refs;
+} NameEntry;
+
+#ifdef USE_ST_LIBRARY
+
 typedef st_table  NameTable;
 typedef st_data_t HashDataType;   /* 1.6 st.h doesn't define st_data_t type */
 
@@ -441,10 +437,10 @@ onig_print_names(FILE* fp, regex_t* reg)
   }
   return 0;
 }
-#endif
+#endif /* ONIG_DEBUG */
 
 static int
-i_free_name_entry(UChar* key, NameEntry* e, void* arg)
+i_free_name_entry(UChar* key, NameEntry* e, void* arg ARG_UNUSED)
 {
   xfree(e->name);
   if (IS_NOT_NULL(e->back_refs)) xfree(e->back_refs);
@@ -501,10 +497,9 @@ typedef struct {
 } INamesArg;
 
 static int
-i_names(UChar* key, NameEntry* e, INamesArg* arg)
+i_names(UChar* key ARG_UNUSED, NameEntry* e, INamesArg* arg)
 {
   int r = (*(arg->func))(e->name,
-                   /*e->name + onigenc_str_bytelen_null(arg->enc, e->name), */
                          e->name + e->name_len,
                          e->back_num,
                         (e->back_num > 1 ? e->back_refs : &(e->back_ref1)),
@@ -518,8 +513,7 @@ i_names(UChar* key, NameEntry* e, INamesArg* arg)
 
 extern int
 onig_foreach_name(regex_t* reg,
-          int (*func)(const UChar*, const UChar*,int,int*,regex_t*,void*),
-          void* arg)
+  int (*func)(const UChar*, const UChar*,int,int*,regex_t*,void*), void* arg)
 {
   INamesArg narg;
   NameTable* t = (NameTable* )reg->name_table;
@@ -536,7 +530,7 @@ onig_foreach_name(regex_t* reg,
 }
 
 static int
-i_renumber_name(UChar* key, NameEntry* e, GroupNumRemap* map)
+i_renumber_name(UChar* key ARG_UNUSED, NameEntry* e, GroupNumRemap* map)
 {
   int i;
 
@@ -575,7 +569,7 @@ onig_number_of_names(regex_t* reg)
     return 0;
 }
 
-#else  /* USE_ST_HASH_TABLE */
+#else  /* USE_ST_LIBRARY */
 
 #define INIT_NAMES_ALLOC_NUM    8
 
@@ -585,7 +579,6 @@ typedef struct {
   int        alloc;
 } NameTable;
 
-
 #ifdef ONIG_DEBUG
 extern int
 onig_print_names(FILE* fp, regex_t* reg)
@@ -683,8 +676,7 @@ name_find(regex_t* reg, UChar* name, UChar* name_end)
 
 extern int
 onig_foreach_name(regex_t* reg,
-          int (*func)(const UChar*, const UChar*,int,int*,regex_t*,void*),
-          void* arg)
+  int (*func)(const UChar*, const UChar*,int,int*,regex_t*,void*), void* arg)
 {
   int i, r;
   NameEntry* e;
@@ -713,7 +705,7 @@ onig_number_of_names(regex_t* reg)
     return 0;
 }
 
-#endif /* else USE_ST_HASH_TABLE */
+#endif /* else USE_ST_LIBRARY */
 
 static int
 name_add(regex_t* reg, UChar* name, UChar* name_end, int backref, ScanEnv* env)
@@ -727,16 +719,18 @@ name_add(regex_t* reg, UChar* name, UChar* name_end, int backref, ScanEnv* env)
 
   e = name_find(reg, name, name_end);
   if (IS_NULL(e)) {
-#ifdef USE_ST_HASH_TABLE
+#ifdef USE_ST_LIBRARY
     if (IS_NULL(t)) {
       t = onig_st_init_strend_table_with_size(5);
       reg->name_table = (void* )t;
     }
     e = (NameEntry* )xmalloc(sizeof(NameEntry));
-    CHECK_NULL_RETURN_VAL(e, ONIGERR_MEMORY);
+    CHECK_NULL_RETURN_MEMERR(e);
 
     e->name = strdup_with_null(reg->enc, name, name_end);
-    if (IS_NULL(e->name)) return ONIGERR_MEMORY;
+    if (IS_NULL(e->name)) {
+      xfree(e);  return ONIGERR_MEMORY;
+    }
     onig_st_insert_strend(t, e->name, (e->name + (name_end - name)),
                           (HashDataType )e);
 
@@ -750,7 +744,7 @@ name_add(regex_t* reg, UChar* name, UChar* name_end, int backref, ScanEnv* env)
     if (IS_NULL(t)) {
       alloc = INIT_NAMES_ALLOC_NUM;
       t = (NameTable* )xmalloc(sizeof(NameTable));
-      CHECK_NULL_RETURN_VAL(t, ONIGERR_MEMORY);
+      CHECK_NULL_RETURN_MEMERR(t);
       t->e     = NULL;
       t->alloc = 0;
       t->num   = 0;
@@ -769,7 +763,7 @@ name_add(regex_t* reg, UChar* name, UChar* name_end, int backref, ScanEnv* env)
 
       alloc = t->alloc * 2;
       t->e = (NameEntry* )xrealloc(t->e, sizeof(NameEntry) * alloc);
-      CHECK_NULL_RETURN_VAL(t->e, ONIGERR_MEMORY);
+      CHECK_NULL_RETURN_MEMERR(t->e);
       t->alloc = alloc;
 
     clear:
@@ -784,6 +778,7 @@ name_add(regex_t* reg, UChar* name, UChar* name_end, int backref, ScanEnv* env)
     e = &(t->e[t->num]);
     t->num++;
     e->name = strdup_with_null(reg->enc, name, name_end);
+    if (IS_NULL(e->name)) return ONIGERR_MEMORY;
     e->name_len = name_end - name;
 #endif
   }
@@ -803,7 +798,7 @@ name_add(regex_t* reg, UChar* name, UChar* name_end, int backref, ScanEnv* env)
     if (e->back_num == 2) {
       alloc = INIT_NAME_BACKREFS_ALLOC_NUM;
       e->back_refs = (int* )xmalloc(sizeof(int) * alloc);
-      CHECK_NULL_RETURN_VAL(e->back_refs, ONIGERR_MEMORY);
+      CHECK_NULL_RETURN_MEMERR(e->back_refs);
       e->back_alloc = alloc;
       e->back_refs[0] = e->back_ref1;
       e->back_refs[1] = backref;
@@ -812,7 +807,7 @@ name_add(regex_t* reg, UChar* name, UChar* name_end, int backref, ScanEnv* env)
       if (e->back_num > e->back_alloc) {
        alloc = e->back_alloc * 2;
        e->back_refs = (int* )xrealloc(e->back_refs, sizeof(int) * alloc);
-       CHECK_NULL_RETURN_VAL(e->back_refs, ONIGERR_MEMORY);
+       CHECK_NULL_RETURN_MEMERR(e->back_refs);
        e->back_alloc = alloc;
       }
       e->back_refs[e->back_num - 1] = backref;
@@ -826,9 +821,8 @@ extern int
 onig_name_to_group_numbers(regex_t* reg, const UChar* name,
                           const UChar* name_end, int** nums)
 {
-  NameEntry* e;
+  NameEntry* e = name_find(reg, name, name_end);
 
-  e = name_find(reg, name, name_end);
   if (IS_NULL(e)) return ONIGERR_UNDEFINED_NAME_REFERENCE;
 
   switch (e->back_num) {
@@ -886,8 +880,7 @@ onig_name_to_backref_number(regex_t* reg, const UChar* name,
 
 extern int
 onig_foreach_name(regex_t* reg,
-          int (*func)(const UChar*, const UChar*,int,int*,regex_t*,void*),
-          void* arg)
+  int (*func)(const UChar*, const UChar*,int,int*,regex_t*,void*), void* arg)
 {
   return ONIG_NO_SUPPORT_CONFIG;
 }
@@ -928,12 +921,12 @@ scan_env_clear(ScanEnv* env)
   BIT_STATUS_CLEAR(env->bt_mem_start);
   BIT_STATUS_CLEAR(env->bt_mem_end);
   BIT_STATUS_CLEAR(env->backrefed_mem);
-  env->error             = (UChar* )NULL;
-  env->error_end         = (UChar* )NULL;
-  env->num_call          = 0;
-  env->num_mem           = 0;
+  env->error      = (UChar* )NULL;
+  env->error_end  = (UChar* )NULL;
+  env->num_call   = 0;
+  env->num_mem    = 0;
 #ifdef USE_NAMED_GROUP
-  env->num_named         = 0;
+  env->num_named  = 0;
 #endif
   env->mem_alloc         = 0;
   env->mem_nodes_dynamic = (Node** )NULL;
@@ -968,7 +961,7 @@ scan_env_add_mem_entry(ScanEnv* env)
        alloc = env->mem_alloc * 2;
        p = (Node** )xrealloc(env->mem_nodes_dynamic, sizeof(Node*) * alloc);
       }
-      CHECK_NULL_RETURN_VAL(p, ONIGERR_MEMORY);
+      CHECK_NULL_RETURN_MEMERR(p);
 
       for (i = env->num_mem + 1; i < alloc; i++)
        p[i] = NULL_NODE;
@@ -993,7 +986,7 @@ scan_env_set_mem_node(ScanEnv* env, int num, Node* node)
 }
 
 
-#ifdef USE_RECYCLE_NODE
+#ifdef USE_PARSE_TREE_NODE_RECYCLE
 typedef struct _FreeNode {
   struct _FreeNode* next;
 } FreeNode;
@@ -1008,20 +1001,20 @@ onig_node_free(Node* node)
   if (IS_NULL(node)) return ;
 
   switch (NTYPE(node)) {
-  case N_STRING:
-    if (IS_NOT_NULL(NSTRING(node).s) && NSTRING(node).s != NSTRING(node).buf) {
-      xfree(NSTRING(node).s);
+  case NT_STR:
+    if (NSTR(node)->capa != 0 &&
+       IS_NOT_NULL(NSTR(node)->s) && NSTR(node)->s != NSTR(node)->buf) {
+      xfree(NSTR(node)->s);
     }
     break;
 
-  case N_LIST:
-  case N_ALT:
-    onig_node_free(NCONS(node).left);
-    /* onig_node_free(NCONS(node).right); */
+  case NT_LIST:
+  case NT_ALT:
+    onig_node_free(NCAR(node));
     {
-      Node* next_node = NCONS(node).right;
+      Node* next_node = NCDR(node);
 
-#ifdef USE_RECYCLE_NODE
+#ifdef USE_PARSE_TREE_NODE_RECYCLE
       {
        FreeNode* n = (FreeNode* )node;
 
@@ -1033,46 +1026,43 @@ onig_node_free(Node* node)
 #else
       xfree(node);
 #endif
-
       node = next_node;
       goto start;
     }
     break;
 
-  case N_CCLASS:
+  case NT_CCLASS:
     {
-      CClassNode* cc = &(NCCLASS(node));
-
-      if (IS_CCLASS_SHARE(cc))
-        return ;
+      CClassNode* cc = NCCLASS(node);
 
+      if (IS_NCCLASS_SHARE(cc)) return ;
       if (cc->mbuf)
         bbuf_free(cc->mbuf);
     }
     break;
 
-  case N_QUANTIFIER:
-    if (NQUANTIFIER(node).target)
-      onig_node_free(NQUANTIFIER(node).target);
+  case NT_QTFR:
+    if (NQTFR(node)->target)
+      onig_node_free(NQTFR(node)->target);
     break;
 
-  case N_EFFECT:
-    if (NEFFECT(node).target)
-      onig_node_free(NEFFECT(node).target);
+  case NT_ENCLOSE:
+    if (NENCLOSE(node)->target)
+      onig_node_free(NENCLOSE(node)->target);
     break;
 
-  case N_BACKREF:
-    if (IS_NOT_NULL(NBACKREF(node).back_dynamic))
-      xfree(NBACKREF(node).back_dynamic);
+  case NT_BREF:
+    if (IS_NOT_NULL(NBREF(node)->back_dynamic))
+      xfree(NBREF(node)->back_dynamic);
     break;
 
-  case N_ANCHOR:
-    if (NANCHOR(node).target)
-      onig_node_free(NANCHOR(node).target);
+  case NT_ANCHOR:
+    if (NANCHOR(node)->target)
+      onig_node_free(NANCHOR(node)->target);
     break;
   }
 
-#ifdef USE_RECYCLE_NODE
+#ifdef USE_PARSE_TREE_NODE_RECYCLE
   {
     FreeNode* n = (FreeNode* )node;
 
@@ -1086,7 +1076,7 @@ onig_node_free(Node* node)
 #endif
 }
 
-#ifdef USE_RECYCLE_NODE
+#ifdef USE_PARSE_TREE_NODE_RECYCLE
 extern int
 onig_free_node_list(void)
 {
@@ -1108,7 +1098,7 @@ node_new(void)
 {
   Node* node;
 
-#ifdef USE_RECYCLE_NODE
+#ifdef USE_PARSE_TREE_NODE_RECYCLE
   THREAD_ATOMIC_START;
   if (IS_NOT_NULL(FreeNodeList)) {
     node = (Node* )FreeNodeList;
@@ -1120,6 +1110,7 @@ node_new(void)
 #endif
 
   node = (Node* )xmalloc(sizeof(Node));
+  /* xmemset(node, 0, sizeof(Node)); */
   return node;
 }
 
@@ -1128,6 +1119,7 @@ static void
 initialize_cclass(CClassNode* cc)
 {
   BITSET_CLEAR(cc->bs);
+  /* cc->base.flags = 0; */
   cc->flags = 0;
   cc->mbuf  = NULL;
 }
@@ -1137,53 +1129,55 @@ node_new_cclass(void)
 {
   Node* node = node_new();
   CHECK_NULL_RETURN(node);
-  node->type = N_CCLASS;
 
-  initialize_cclass(&(NCCLASS(node)));
+  SET_NTYPE(node, NT_CCLASS);
+  initialize_cclass(NCCLASS(node));
   return node;
 }
 
 static Node*
-node_new_cclass_by_codepoint_range(int not,
-                   const OnigCodePoint sbr[], const OnigCodePoint mbr[])
+node_new_cclass_by_codepoint_range(int not, OnigCodePoint sb_out,
+                                  const OnigCodePoint ranges[])
 {
+  int n, i;
   CClassNode* cc;
-  int n, i, j;
+  OnigCodePoint j;
 
-  Node* node = node_new();
+  Node* node = node_new_cclass();
   CHECK_NULL_RETURN(node);
-  node->type = N_CCLASS;
 
-  cc = &(NCCLASS(node));
-  cc->flags = 0;
-  if (not != 0) CCLASS_SET_NOT(cc);
+  cc = NCCLASS(node);
+  if (not != 0) NCCLASS_SET_NOT(cc);
 
   BITSET_CLEAR(cc->bs);
-  if (IS_NOT_NULL(sbr)) {
-    n = ONIGENC_CODE_RANGE_NUM(sbr);
+  if (sb_out > 0 && IS_NOT_NULL(ranges)) {
+    n = ONIGENC_CODE_RANGE_NUM(ranges);
     for (i = 0; i < n; i++) {
-      for (j  = ONIGENC_CODE_RANGE_FROM(sbr, i);
-           j <= (int )ONIGENC_CODE_RANGE_TO(sbr, i); j++) {
+      for (j  = ONIGENC_CODE_RANGE_FROM(ranges, i);
+           j <= (OnigCodePoint )ONIGENC_CODE_RANGE_TO(ranges, i); j++) {
+       if (j >= sb_out) goto sb_end;
+
         BITSET_SET_BIT(cc->bs, j);
       }
     }
   }
 
-  if (IS_NULL(mbr)) {
+ sb_end:
+  if (IS_NULL(ranges)) {
   is_null:
     cc->mbuf = NULL;
   }
   else {
     BBuf* bbuf;
 
-    n = ONIGENC_CODE_RANGE_NUM(mbr);
+    n = ONIGENC_CODE_RANGE_NUM(ranges);
     if (n == 0) goto is_null;
 
     bbuf = (BBuf* )xmalloc(sizeof(BBuf));
-    CHECK_NULL_RETURN_VAL(bbuf, NULL);
+    CHECK_NULL_RETURN(bbuf);
     bbuf->alloc = n + 1;
     bbuf->used  = n + 1;
-    bbuf->p     = (UChar* )((void* )mbr);
+    bbuf->p     = (UChar* )((void* )ranges);
 
     cc->mbuf = bbuf;
   }
@@ -1192,12 +1186,14 @@ node_new_cclass_by_codepoint_range(int not,
 }
 
 static Node*
-node_new_ctype(int type)
+node_new_ctype(int type, int not)
 {
   Node* node = node_new();
   CHECK_NULL_RETURN(node);
-  node->type = N_CTYPE;
-  NCTYPE(node).type = type;
+
+  SET_NTYPE(node, NT_CTYPE);
+  NCTYPE(node)->ctype = type;
+  NCTYPE(node)->not   = not;
   return node;
 }
 
@@ -1206,7 +1202,8 @@ node_new_anychar(void)
 {
   Node* node = node_new();
   CHECK_NULL_RETURN(node);
-  node->type = N_ANYCHAR;
+
+  SET_NTYPE(node, NT_CANY);
   return node;
 }
 
@@ -1215,9 +1212,10 @@ node_new_list(Node* left, Node* right)
 {
   Node* node = node_new();
   CHECK_NULL_RETURN(node);
-  node->type = N_LIST;
-  NCONS(node).left  = left;
-  NCONS(node).right = right;
+
+  SET_NTYPE(node, NT_LIST);
+  NCAR(node)  = left;
+  NCDR(node) = right;
   return node;
 }
 
@@ -1227,14 +1225,33 @@ onig_node_new_list(Node* left, Node* right)
   return node_new_list(left, right);
 }
 
-static Node*
-node_new_alt(Node* left, Node* right)
+extern Node*
+onig_node_list_add(Node* list, Node* x)
+{
+  Node *n;
+
+  n = onig_node_new_list(x, NULL);
+  if (IS_NULL(n)) return NULL_NODE;
+
+  if (IS_NOT_NULL(list)) {
+    while (IS_NOT_NULL(NCDR(list)))
+      list = NCDR(list);
+
+    NCDR(list) = n;
+  }
+
+  return n;
+}
+
+extern Node*
+onig_node_new_alt(Node* left, Node* right)
 {
   Node* node = node_new();
   CHECK_NULL_RETURN(node);
-  node->type = N_ALT;
-  NCONS(node).left  = left;
-  NCONS(node).right = right;
+
+  SET_NTYPE(node, NT_ALT);
+  NCAR(node)  = left;
+  NCDR(node) = right;
   return node;
 }
 
@@ -1243,16 +1260,17 @@ onig_node_new_anchor(int type)
 {
   Node* node = node_new();
   CHECK_NULL_RETURN(node);
-  node->type = N_ANCHOR;
-  NANCHOR(node).type     = type;
-  NANCHOR(node).target   = NULL;
-  NANCHOR(node).char_len = -1;
+
+  SET_NTYPE(node, NT_ANCHOR);
+  NANCHOR(node)->type     = type;
+  NANCHOR(node)->target   = NULL;
+  NANCHOR(node)->char_len = -1;
   return node;
 }
 
 static Node*
 node_new_backref(int back_num, int* backrefs, int by_name,
-#ifdef USE_BACKREF_AT_LEVEL
+#ifdef USE_BACKREF_WITH_LEVEL
                 int exist_level, int nest_level,
 #endif
                 ScanEnv* env)
@@ -1261,31 +1279,32 @@ node_new_backref(int back_num, int* backrefs, int by_name,
   Node* node = node_new();
 
   CHECK_NULL_RETURN(node);
-  node->type = N_BACKREF;
-  NBACKREF(node).state    = 0;
-  NBACKREF(node).back_num = back_num;
-  NBACKREF(node).back_dynamic = (int* )NULL;
+
+  SET_NTYPE(node, NT_BREF);
+  NBREF(node)->state    = 0;
+  NBREF(node)->back_num = back_num;
+  NBREF(node)->back_dynamic = (int* )NULL;
   if (by_name != 0)
-    NBACKREF(node).state |= NST_NAME_REF;
+    NBREF(node)->state |= NST_NAME_REF;
 
-#ifdef USE_BACKREF_AT_LEVEL
+#ifdef USE_BACKREF_WITH_LEVEL
   if (exist_level != 0) {
-    NBACKREF(node).state |= NST_NEST_LEVEL;
-    NBACKREF(node).nest_level  = nest_level;
+    NBREF(node)->state |= NST_NEST_LEVEL;
+    NBREF(node)->nest_level  = nest_level;
   }
 #endif
 
   for (i = 0; i < back_num; i++) {
     if (backrefs[i] <= env->num_mem &&
        IS_NULL(SCANENV_MEM_NODES(env)[backrefs[i]])) {
-      NBACKREF(node).state |= NST_RECURSION;   /* /...(\1).../ */
+      NBREF(node)->state |= NST_RECURSION;   /* /...(\1).../ */
       break;
     }
   }
 
   if (back_num <= NODE_BACKREFS_SIZE) {
     for (i = 0; i < back_num; i++)
-      NBACKREF(node).back_static[i] = backrefs[i];
+      NBREF(node)->back_static[i] = backrefs[i];
   }
   else {
     int* p = (int* )xmalloc(sizeof(int) * back_num);
@@ -1293,7 +1312,7 @@ node_new_backref(int back_num, int* backrefs, int by_name,
       onig_node_free(node);
       return NULL;
     }
-    NBACKREF(node).back_dynamic = p;
+    NBREF(node)->back_dynamic = p;
     for (i = 0; i < back_num; i++)
       p[i] = backrefs[i];
   }
@@ -1302,17 +1321,17 @@ node_new_backref(int back_num, int* backrefs, int by_name,
 
 #ifdef USE_SUBEXP_CALL
 static Node*
-node_new_call(UChar* name, UChar* name_end)
+node_new_call(UChar* name, UChar* name_end, int gnum)
 {
   Node* node = node_new();
   CHECK_NULL_RETURN(node);
 
-  node->type = N_CALL;
-  NCALL(node).state    = 0;
-  NCALL(node).ref_num  = CALLNODE_REFNUM_UNDEF;
-  NCALL(node).target   = NULL_NODE;
-  NCALL(node).name     = name;
-  NCALL(node).name_end = name_end;
+  SET_NTYPE(node, NT_CALL);
+  NCALL(node)->state     = 0;
+  NCALL(node)->target    = NULL_NODE;
+  NCALL(node)->name      = name;
+  NCALL(node)->name_end  = name_end;
+  NCALL(node)->group_num = gnum;  /* call by number if gnum != 0 */
   return node;
 }
 #endif
@@ -1322,58 +1341,60 @@ node_new_quantifier(int lower, int upper, int by_number)
 {
   Node* node = node_new();
   CHECK_NULL_RETURN(node);
-  node->type = N_QUANTIFIER;
-  NQUANTIFIER(node).state  = 0;
-  NQUANTIFIER(node).target = NULL;
-  NQUANTIFIER(node).lower  = lower;
-  NQUANTIFIER(node).upper  = upper;
-  NQUANTIFIER(node).greedy = 1;
-  NQUANTIFIER(node).target_empty_info = NQ_TARGET_ISNOT_EMPTY;
-  NQUANTIFIER(node).head_exact        = NULL_NODE;
-  NQUANTIFIER(node).next_head_exact   = NULL_NODE;
-  NQUANTIFIER(node).is_refered        = 0;
+
+  SET_NTYPE(node, NT_QTFR);
+  NQTFR(node)->state  = 0;
+  NQTFR(node)->target = NULL;
+  NQTFR(node)->lower  = lower;
+  NQTFR(node)->upper  = upper;
+  NQTFR(node)->greedy = 1;
+  NQTFR(node)->target_empty_info = NQ_TARGET_ISNOT_EMPTY;
+  NQTFR(node)->head_exact        = NULL_NODE;
+  NQTFR(node)->next_head_exact   = NULL_NODE;
+  NQTFR(node)->is_refered        = 0;
   if (by_number != 0)
-    NQUANTIFIER(node).state |= NST_BY_NUMBER;
+    NQTFR(node)->state |= NST_BY_NUMBER;
 
 #ifdef USE_COMBINATION_EXPLOSION_CHECK
-  NQUANTIFIER(node).comb_exp_check_num = 0;
+  NQTFR(node)->comb_exp_check_num = 0;
 #endif
 
   return node;
 }
 
 static Node*
-node_new_effect(int type)
+node_new_enclose(int type)
 {
   Node* node = node_new();
   CHECK_NULL_RETURN(node);
-  node->type = N_EFFECT;
-  NEFFECT(node).type      = type;
-  NEFFECT(node).state     =  0;
-  NEFFECT(node).regnum    =  0;
-  NEFFECT(node).option    =  0;
-  NEFFECT(node).target    = NULL;
-  NEFFECT(node).call_addr = -1;
-  NEFFECT(node).opt_count =  0;
+
+  SET_NTYPE(node, NT_ENCLOSE);
+  NENCLOSE(node)->type      = type;
+  NENCLOSE(node)->state     =  0;
+  NENCLOSE(node)->regnum    =  0;
+  NENCLOSE(node)->option    =  0;
+  NENCLOSE(node)->target    = NULL;
+  NENCLOSE(node)->call_addr = -1;
+  NENCLOSE(node)->opt_count =  0;
   return node;
 }
 
 extern Node*
-onig_node_new_effect(int type)
+onig_node_new_enclose(int type)
 {
-  return node_new_effect(type);
+  return node_new_enclose(type);
 }
 
 static Node*
-node_new_effect_memory(OnigOptionType option, int is_named)
+node_new_enclose_memory(OnigOptionType option, int is_named)
 {
-  Node* node = node_new_effect(EFFECT_MEMORY);
+  Node* node = node_new_enclose(ENCLOSE_MEMORY);
   CHECK_NULL_RETURN(node);
   if (is_named != 0)
-    SET_EFFECT_STATUS(node, NST_NAMED_GROUP);
+    SET_ENCLOSE_STATUS(node, NST_NAMED_GROUP);
 
 #ifdef USE_SUBEXP_CALL
-  NEFFECT(node).option = option;
+  NENCLOSE(node)->option = option;
 #endif
   return node;
 }
@@ -1381,9 +1402,9 @@ node_new_effect_memory(OnigOptionType option, int is_named)
 static Node*
 node_new_option(OnigOptionType option)
 {
-  Node* node = node_new_effect(EFFECT_OPTION);
+  Node* node = node_new_enclose(ENCLOSE_OPTION);
   CHECK_NULL_RETURN(node);
-  NEFFECT(node).option = option;
+  NENCLOSE(node)->option = option;
   return node;
 }
 
@@ -1393,36 +1414,43 @@ onig_node_str_cat(Node* node, const UChar* s, const UChar* end)
   int addlen = end - s;
 
   if (addlen > 0) {
-    int len  = NSTRING(node).end - NSTRING(node).s;
+    int len  = NSTR(node)->end - NSTR(node)->s;
 
-    if (NSTRING(node).capa > 0 || (len + addlen > NODE_STR_BUF_SIZE - 1)) {
+    if (NSTR(node)->capa > 0 || (len + addlen > NODE_STR_BUF_SIZE - 1)) {
       UChar* p;
       int capa = len + addlen + NODE_STR_MARGIN;
 
-      if (capa <= NSTRING(node).capa) {
-       k_strcpy(NSTRING(node).s + len, s, end);
+      if (capa <= NSTR(node)->capa) {
+       onig_strcpy(NSTR(node)->s + len, s, end);
       }
       else {
-       if (NSTRING(node).s == NSTRING(node).buf)
-         p = strcat_capa_from_static(NSTRING(node).s, NSTRING(node).end,
+       if (NSTR(node)->s == NSTR(node)->buf)
+         p = strcat_capa_from_static(NSTR(node)->s, NSTR(node)->end,
                                      s, end, capa);
        else
-         p = k_strcat_capa(NSTRING(node).s, NSTRING(node).end, s, end, capa);
+         p = strcat_capa(NSTR(node)->s, NSTR(node)->end, s, end, capa);
 
-       CHECK_NULL_RETURN_VAL(p, ONIGERR_MEMORY);
-       NSTRING(node).s    = p;
-       NSTRING(node).capa = capa;
+       CHECK_NULL_RETURN_MEMERR(p);
+       NSTR(node)->s    = p;
+       NSTR(node)->capa = capa;
       }
     }
     else {
-      k_strcpy(NSTRING(node).s + len, s, end);
+      onig_strcpy(NSTR(node)->s + len, s, end);
     }
-    NSTRING(node).end = NSTRING(node).s + len + addlen;
+    NSTR(node)->end = NSTR(node)->s + len + addlen;
   }
 
   return 0;
 }
 
+extern int
+onig_node_str_set(Node* node, const UChar* s, const UChar* end)
+{
+  onig_node_str_clear(node);
+  return onig_node_str_cat(node, s, end);
+}
+
 static int
 node_str_cat_char(Node* node, UChar c)
 {
@@ -1435,26 +1463,25 @@ node_str_cat_char(Node* node, UChar c)
 extern void
 onig_node_conv_to_str_node(Node* node, int flag)
 {
-  node->type = N_STRING;
-
-  NSTRING(node).flag = flag;
-  NSTRING(node).capa = 0;
-  NSTRING(node).s    = NSTRING(node).buf;
-  NSTRING(node).end  = NSTRING(node).buf;
+  SET_NTYPE(node, NT_STR);
+  NSTR(node)->flag = flag;
+  NSTR(node)->capa = 0;
+  NSTR(node)->s    = NSTR(node)->buf;
+  NSTR(node)->end  = NSTR(node)->buf;
 }
 
 extern void
 onig_node_str_clear(Node* node)
 {
-  if (NSTRING(node).capa != 0 &&
-      IS_NOT_NULL(NSTRING(node).s) && NSTRING(node).s != NSTRING(node).buf) {
-    xfree(NSTRING(node).s);
+  if (NSTR(node)->capa != 0 &&
+      IS_NOT_NULL(NSTR(node)->s) && NSTR(node)->s != NSTR(node)->buf) {
+    xfree(NSTR(node)->s);
   }
 
-  NSTRING(node).capa = 0;
-  NSTRING(node).flag = 0;
-  NSTRING(node).s    = NSTRING(node).buf;
-  NSTRING(node).end  = NSTRING(node).buf;
+  NSTR(node)->capa = 0;
+  NSTR(node)->flag = 0;
+  NSTR(node)->s    = NSTR(node)->buf;
+  NSTR(node)->end  = NSTR(node)->buf;
 }
 
 static Node*
@@ -1463,11 +1490,11 @@ node_new_str(const UChar* s, const UChar* end)
   Node* node = node_new();
   CHECK_NULL_RETURN(node);
 
-  node->type = N_STRING;
-  NSTRING(node).capa = 0;
-  NSTRING(node).flag = 0;
-  NSTRING(node).s    = NSTRING(node).buf;
-  NSTRING(node).end  = NSTRING(node).buf;
+  SET_NTYPE(node, NT_STR);
+  NSTR(node)->capa = 0;
+  NSTR(node)->flag = 0;
+  NSTR(node)->s    = NSTR(node)->buf;
+  NSTR(node)->end  = NSTR(node)->buf;
   if (onig_node_str_cat(node, s, end)) {
     onig_node_free(node);
     return NULL;
@@ -1481,7 +1508,6 @@ onig_node_new_str(const UChar* s, const UChar* end)
   return node_new_str(s, end);
 }
 
-#ifdef NUMBERED_CHAR_IS_NOT_CASE_AMBIG
 static Node*
 node_new_str_raw(UChar* s, UChar* end)
 {
@@ -1489,7 +1515,6 @@ node_new_str_raw(UChar* s, UChar* end)
   NSTRING_SET_RAW(node);
   return node;
 }
-#endif
 
 static Node*
 node_new_empty(void)
@@ -1498,12 +1523,12 @@ node_new_empty(void)
 }
 
 static Node*
-node_new_str_char(UChar c)
+node_new_str_raw_char(UChar c)
 {
   UChar p[1];
 
   p[0] = c;
-  return node_new_str(p, p + 1);
+  return node_new_str_raw(p, p + 1);
 }
 
 static Node*
@@ -1528,7 +1553,7 @@ static int
 str_node_can_be_split(StrNode* sn, OnigEncoding enc)
 {
   if (sn->end > sn->s) {
-    return ((enc_len(enc, sn->s) < sn->end - sn->s)  ?  1 : 0);
+    return ((enclen(enc, sn->s) < sn->end - sn->s)  ?  1 : 0);
   }
   return 0;
 }
@@ -1651,7 +1676,7 @@ new_code_range(BBuf** pbuf)
   BBuf* bbuf;
 
   bbuf = *pbuf = (BBuf* )xmalloc(sizeof(BBuf));
-  CHECK_NULL_RETURN_VAL(*pbuf, ONIGERR_MEMORY);
+  CHECK_NULL_RETURN_MEMERR(*pbuf);
   r = BBUF_INIT(*pbuf, INIT_MULTI_BYTE_RANGE_SIZE);
   if (r) return r;
 
@@ -1952,10 +1977,10 @@ and_cclass(CClassNode* dest, CClassNode* cc, OnigEncoding enc)
   BitSetRef bsr1, bsr2;
   BitSet bs1, bs2;
 
-  not1 = IS_CCLASS_NOT(dest);
+  not1 = IS_NCCLASS_NOT(dest);
   bsr1 = dest->bs;
   buf1 = dest->mbuf;
-  not2 = IS_CCLASS_NOT(cc);
+  not2 = IS_NCCLASS_NOT(cc);
   bsr2 = cc->bs;
   buf2 = cc->mbuf;
 
@@ -2010,10 +2035,10 @@ or_cclass(CClassNode* dest, CClassNode* cc, OnigEncoding enc)
   BitSetRef bsr1, bsr2;
   BitSet bs1, bs2;
 
-  not1 = IS_CCLASS_NOT(dest);
+  not1 = IS_NCCLASS_NOT(dest);
   bsr1 = dest->bs;
   buf1 = dest->mbuf;
-  not2 = IS_CCLASS_NOT(cc);
+  not2 = IS_NCCLASS_NOT(cc);
   bsr2 = cc->bs;
   buf2 = cc->mbuf;
 
@@ -2066,13 +2091,13 @@ conv_backslash_value(int c, ScanEnv* env)
 {
   if (IS_SYNTAX_OP(env->syntax, ONIG_SYN_OP_ESC_CONTROL_CHARS)) {
     switch (c) {
-    case 'n':  return '\n';
-    case 't':  return '\t';
-    case 'r':  return '\r';
-    case 'f':  return '\f';
-    case 'a':  return '\007';
-    case 'b':  return '\010';
-    case 'e':  return '\033';
+    case 'n': return '\n';
+    case 't': return '\t';
+    case 'r': return '\r';
+    case 'f': return '\f';
+    case 'a': return '\007';
+    case 'b': return '\010';
+    case 'e': return '\033';
     case 'v':
       if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_ESC_V_VTAB))
        return '\v';
@@ -2089,26 +2114,26 @@ static int
 is_invalid_quantifier_target(Node* node)
 {
   switch (NTYPE(node)) {
-  case N_ANCHOR:
+  case NT_ANCHOR:
     return 1;
     break;
 
-  case N_EFFECT:
-    if (NEFFECT(node).type == EFFECT_OPTION)
-      return is_invalid_quantifier_target(NEFFECT(node).target);
+  case NT_ENCLOSE:
+    /* allow enclosed elements */
+    /* return is_invalid_quantifier_target(NENCLOSE(node)->target); */
     break;
 
-  case N_LIST: /* ex. (?:\G\A)* */
+  case NT_LIST:
     do {
-      if (! is_invalid_quantifier_target(NCONS(node).left)) return 0;
-    } while (IS_NOT_NULL(node = NCONS(node).right));
+      if (! is_invalid_quantifier_target(NCAR(node))) return 0;
+    } while (IS_NOT_NULL(node = NCDR(node)));
     return 0;
     break;
 
-  case N_ALT:  /* ex. (?:abc|\A)* */
+  case NT_ALT:
     do {
-      if (is_invalid_quantifier_target(NCONS(node).left)) return 1;
-    } while (IS_NOT_NULL(node = NCONS(node).right));
+      if (is_invalid_quantifier_target(NCAR(node))) return 1;
+    } while (IS_NOT_NULL(node = NCDR(node)));
     break;
 
   default:
@@ -2119,24 +2144,24 @@ is_invalid_quantifier_target(Node* node)
 
 /* ?:0, *:1, +:2, ??:3, *?:4, +?:5 */
 static int
-popular_quantifier_num(QuantifierNode* qf)
+popular_quantifier_num(QtfrNode* q)
 {
-  if (qf->greedy) {
-    if (qf->lower == 0) {
-      if (qf->upper == 1) return 0;
-      else if (IS_REPEAT_INFINITE(qf->upper)) return 1;
+  if (q->greedy) {
+    if (q->lower == 0) {
+      if (q->upper == 1) return 0;
+      else if (IS_REPEAT_INFINITE(q->upper)) return 1;
     }
-    else if (qf->lower == 1) {
-      if (IS_REPEAT_INFINITE(qf->upper)) return 2;
+    else if (q->lower == 1) {
+      if (IS_REPEAT_INFINITE(q->upper)) return 2;
     }
   }
   else {
-    if (qf->lower == 0) {
-      if (qf->upper == 1) return 3;
-      else if (IS_REPEAT_INFINITE(qf->upper)) return 4;
+    if (q->lower == 0) {
+      if (q->upper == 1) return 3;
+      else if (IS_REPEAT_INFINITE(q->upper)) return 4;
     }
-    else if (qf->lower == 1) {
-      if (IS_REPEAT_INFINITE(qf->upper)) return 5;
+    else if (q->lower == 1) {
+      if (IS_REPEAT_INFINITE(q->upper)) return 5;
     }
   }
   return -1;
@@ -2166,16 +2191,17 @@ extern void
 onig_reduce_nested_quantifier(Node* pnode, Node* cnode)
 {
   int pnum, cnum;
-  QuantifierNode *p, *c;
+  QtfrNode *p, *c;
 
-  p = &(NQUANTIFIER(pnode));
-  c = &(NQUANTIFIER(cnode));
+  p = NQTFR(pnode);
+  c = NQTFR(cnode);
   pnum = popular_quantifier_num(p);
   cnum = popular_quantifier_num(c);
+  if (pnum < 0 || cnum < 0) return ;
 
   switch(ReduceTypeTable[cnum][pnum]) {
   case RQ_DEL:
-    *p = *c;
+    *pnode = *cnode;
     break;
   case RQ_A:
     p->target = c->target;
@@ -2262,7 +2288,7 @@ typedef struct {
       int  ref1;
       int* refs;
       int  by_name;
-#ifdef USE_BACKREF_AT_LEVEL
+#ifdef USE_BACKREF_WITH_LEVEL
       int  exist_level;
       int  level;   /* \k<name+n> */
 #endif
@@ -2270,8 +2296,10 @@ typedef struct {
     struct {
       UChar* name;
       UChar* name_end;
+      int    gnum;
     } call;
     struct {
+      int ctype;
       int not;
     } prop;
   } u;
@@ -2346,7 +2374,7 @@ fetch_range_quantifier(UChar** src, UChar* end, OnigToken* tok, ScanEnv* env)
   if (PEND) goto invalid;
   PFETCH(c);
   if (IS_SYNTAX_OP(env->syntax, ONIG_SYN_OP_ESC_BRACE_INTERVAL)) {
-    if (c != MC_ESC(enc)) goto invalid;
+    if (c != MC_ESC(env->syntax)) goto invalid;
     PFETCH(c);
   }
   if (c != '}') goto invalid;
@@ -2389,7 +2417,7 @@ fetch_escaped_value(UChar** src, UChar* end, ScanEnv* env)
       if (c != '-') return ONIGERR_META_CODE_SYNTAX;
       if (PEND) return ONIGERR_END_PATTERN_AT_META;
       PFETCH(c);
-      if (c == MC_ESC(enc)) {
+      if (c == MC_ESC(env->syntax)) {
        v = fetch_escaped_value(&p, end, env);
        if (v < 0) return v;
         c = (OnigCodePoint )v;
@@ -2419,7 +2447,7 @@ fetch_escaped_value(UChar** src, UChar* end, ScanEnv* env)
        c = 0177;
       }
       else {
-        if (c == MC_ESC(enc)) {
+        if (c == MC_ESC(env->syntax)) {
           v = fetch_escaped_value(&p, end, env);
           if (v < 0) return v;
           c = (OnigCodePoint )v;
@@ -2444,23 +2472,47 @@ fetch_escaped_value(UChar** src, UChar* end, ScanEnv* env)
 
 static int fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env);
 
+static OnigCodePoint
+get_name_end_code_point(OnigCodePoint start)
+{
+  switch (start) {
+  case '<':  return (OnigCodePoint )'>'; break;
+  case '\'': return (OnigCodePoint )'\''; break;
+  default:
+    break;
+  }
+
+  return (OnigCodePoint )0;
+}
+
 #ifdef USE_NAMED_GROUP
-#ifdef USE_BACKREF_AT_LEVEL
+#ifdef USE_BACKREF_WITH_LEVEL
 /*
    \k<name+n>, \k<name-n>
+   \k<num+n>,  \k<num-n>
+   \k<-num+n>, \k<-num-n>
 */
 static int
-fetch_name_with_level(UChar** src, UChar* end, UChar** rname_end
-                     , ScanEnv* env, int* level)
+fetch_name_with_level(OnigCodePoint start_code, UChar** src, UChar* end,
+                     UChar** rname_end, ScanEnv* env,
+                     int* rback_num, int* rlevel)
 {
-  int r, exist_level = 0;
+  int r, sign, is_num, exist_level;
+  OnigCodePoint end_code;
   OnigCodePoint c = 0;
-  OnigCodePoint first_code;
   OnigEncoding enc = env->enc;
   UChar *name_end;
+  UChar *pnum_head;
   UChar *p = *src;
   PFETCH_READY;
 
+  *rback_num = 0;
+  is_num = exist_level = 0;
+  sign = 1;
+  pnum_head = *src;
+
+  end_code = get_name_end_code_point(start_code);
+
   name_end = end;
   r = 0;
   if (PEND) {
@@ -2468,11 +2520,18 @@ fetch_name_with_level(UChar** src, UChar* end, UChar** rname_end
   }
   else {
     PFETCH(c);
-    first_code = c;
-    if (c == '>')
+    if (c == end_code)
       return ONIGERR_EMPTY_GROUP_NAME;
 
-    if (!ONIGENC_IS_CODE_WORD(enc, c)) {
+    if (ONIGENC_IS_CODE_DIGIT(enc, c)) {
+      is_num = 1;
+    }
+    else if (c == '-') {
+      is_num = 2;
+      sign = -1;
+      pnum_head = p;
+    }
+    else if (!ONIGENC_IS_CODE_WORD(enc, c)) {
       r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME;
     }
   }
@@ -2480,43 +2539,58 @@ fetch_name_with_level(UChar** src, UChar* end, UChar** rname_end
   while (!PEND) {
     name_end = p;
     PFETCH(c);
-    if (c == '>' || c == ')' || c == '+' || c == '-') break;
+    if (c == end_code || c == ')' || c == '+' || c == '-') {
+      if (is_num == 2)         r = ONIGERR_INVALID_GROUP_NAME;
+      break;
+    }
 
-    if (!ONIGENC_IS_CODE_WORD(enc, c)) {
+    if (is_num != 0) {
+      if (ONIGENC_IS_CODE_DIGIT(enc, c)) {
+       is_num = 1;
+      }
+      else {
+       r = ONIGERR_INVALID_GROUP_NAME;
+       is_num = 0;
+      }
+    }
+    else if (!ONIGENC_IS_CODE_WORD(enc, c)) {
       r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME;
     }
   }
 
-  if (c != '>') {
+  if (r == 0 && c != end_code) {
     if (c == '+' || c == '-') {
-      int num;
+      int level;
       int flag = (c == '-' ? -1 : 1);
 
       PFETCH(c);
       if (! ONIGENC_IS_CODE_DIGIT(enc, c)) goto err;
       PUNFETCH;
-      num = onig_scan_unsigned_number(&p, end, enc);
-      if (num < 0) return ONIGERR_TOO_BIG_NUMBER;
-      *level = (num * flag);
+      level = onig_scan_unsigned_number(&p, end, enc);
+      if (level < 0) return ONIGERR_TOO_BIG_NUMBER;
+      *rlevel = (level * flag);
       exist_level = 1;
 
       PFETCH(c);
-      if (c == '>')
-       goto first_check;
+      if (c == end_code)
+       goto end;
     }
 
   err:
     r = ONIGERR_INVALID_GROUP_NAME;
     name_end = end;
   }
-  else {
-  first_check:
-    if (ONIGENC_IS_CODE_ASCII(first_code) &&
-        ONIGENC_IS_CODE_UPPER(enc, first_code))
-      r = ONIGERR_INVALID_GROUP_NAME;
-  }
 
+ end:
   if (r == 0) {
+    if (is_num != 0) {
+      *rback_num = onig_scan_unsigned_number(&pnum_head, name_end, enc);
+      if (*rback_num < 0) return ONIGERR_TOO_BIG_NUMBER;
+      else if (*rback_num == 0) goto err;
+
+      *rback_num *= sign;
+    }
+
     *rname_end = name_end;
     *src = p;
     return (exist_level ? 1 : 0);
@@ -2526,33 +2600,40 @@ fetch_name_with_level(UChar** src, UChar* end, UChar** rname_end
     return r;
   }
 }
-#endif /* USE_BACKREF_AT_LEVEL */
+#endif /* USE_BACKREF_WITH_LEVEL */
 
 /*
   def: 0 -> define name    (don't allow number name)
        1 -> reference name (allow number name)
 */
 static int
-fetch_name(UChar** src, UChar* end, UChar** rname_end, ScanEnv* env, int ref)
+fetch_name(OnigCodePoint start_code, UChar** src, UChar* end,
+          UChar** rname_end, ScanEnv* env, int* rback_num, int ref)
 {
-  int r, is_num;
+  int r, is_num, sign;
+  OnigCodePoint end_code;
   OnigCodePoint c = 0;
-  OnigCodePoint first_code;
   OnigEncoding enc = env->enc;
   UChar *name_end;
+  UChar *pnum_head;
   UChar *p = *src;
   PFETCH_READY;
 
+  *rback_num = 0;
+
+  end_code = get_name_end_code_point(start_code);
+
   name_end = end;
+  pnum_head = *src;
   r = 0;
   is_num = 0;
+  sign = 1;
   if (PEND) {
     return ONIGERR_EMPTY_GROUP_NAME;
   }
   else {
     PFETCH(c);
-    first_code = c;
-    if (c == '>')
+    if (c == end_code)
       return ONIGERR_EMPTY_GROUP_NAME;
 
     if (ONIGENC_IS_CODE_DIGIT(enc, c)) {
@@ -2560,6 +2641,18 @@ fetch_name(UChar** src, UChar* end, UChar** rname_end, ScanEnv* env, int ref)
        is_num = 1;
       else {
        r = ONIGERR_INVALID_GROUP_NAME;
+       is_num = 0;
+      }
+    }
+    else if (c == '-') {
+      if (ref == 1) {
+       is_num = 2;
+       sign = -1;
+       pnum_head = p;
+      }
+      else {
+       r = ONIGERR_INVALID_GROUP_NAME;
+       is_num = 0;     
       }
     }
     else if (!ONIGENC_IS_CODE_WORD(enc, c)) {
@@ -2567,74 +2660,137 @@ fetch_name(UChar** src, UChar* end, UChar** rname_end, ScanEnv* env, int ref)
     }
   }
 
-  while (!PEND) {
-    name_end = p;
-    PFETCH(c);
-    if (c == '>' || c == ')') break;
+  if (r == 0) {
+    while (!PEND) {
+      name_end = p;
+      PFETCH(c);
+      if (c == end_code || c == ')') {
+       if (is_num == 2)        r = ONIGERR_INVALID_GROUP_NAME;
+       break;
+      }
 
-    if (is_num == 1) {
-      if (! ONIGENC_IS_CODE_DIGIT(enc, c)) {
-       if (!ONIGENC_IS_CODE_WORD(enc, c))
-         r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME;
-       else
-         r = ONIGERR_INVALID_GROUP_NAME;
+      if (is_num != 0) {
+       if (ONIGENC_IS_CODE_DIGIT(enc, c)) {
+         is_num = 1;
+       }
+       else {
+         if (!ONIGENC_IS_CODE_WORD(enc, c))
+           r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME;
+         else
+           r = ONIGERR_INVALID_GROUP_NAME;
+
+         is_num = 0;
+       }
       }
-    }
-    else {
-      if (!ONIGENC_IS_CODE_WORD(enc, c)) {
-        r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME;
+      else {
+       if (!ONIGENC_IS_CODE_WORD(enc, c)) {
+         r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME;
+       }
       }
     }
-  }
 
-  if (c != '>') {
-    r = ONIGERR_INVALID_GROUP_NAME;
-    name_end = end;
-  }
-  else {
-    if (ONIGENC_IS_CODE_ASCII(first_code) &&
-        ONIGENC_IS_CODE_UPPER(enc, first_code))
+    if (c != end_code) {
       r = ONIGERR_INVALID_GROUP_NAME;
-  }
+      name_end = end;
+    }
+
+    if (is_num != 0) {
+      *rback_num = onig_scan_unsigned_number(&pnum_head, name_end, enc);
+      if (*rback_num < 0) return ONIGERR_TOO_BIG_NUMBER;
+      else if (*rback_num == 0) {
+       r = ONIGERR_INVALID_GROUP_NAME;
+       goto err;
+      }
+
+      *rback_num *= sign;
+    }
 
-  if (r == 0) {
     *rname_end = name_end;
     *src = p;
     return 0;
   }
   else {
+    while (!PEND) {
+      name_end = p;
+      PFETCH(c);
+      if (c == end_code || c == ')')
+       break;
+    }
+    if (PEND)
+      name_end = end;
+
+  err:
     onig_scan_env_set_error_string(env, r, *src, name_end);
     return r;
   }
 }
 #else
 static int
-fetch_name(UChar** src, UChar* end, UChar** rname_end, ScanEnv* env, int ref)
+fetch_name(OnigCodePoint start_code, UChar** src, UChar* end,
+          UChar** rname_end, ScanEnv* env, int* rback_num, int ref)
 {
-  int r, len;
+  int r, is_num, sign;
+  OnigCodePoint end_code;
   OnigCodePoint c = 0;
   UChar *name_end;
   OnigEncoding enc = env->enc;
+  UChar *pnum_head;
   UChar *p = *src;
   PFETCH_READY;
 
+  *rback_num = 0;
+
+  end_code = get_name_end_code_point(start_code);
+
+  *rname_end = name_end = end;
   r = 0;
+  pnum_head = *src;
+  is_num = 0;
+  sign = 1;
+
+  if (PEND) {
+    return ONIGERR_EMPTY_GROUP_NAME;
+  }
+  else {
+    PFETCH(c);
+    if (c == end_code)
+      return ONIGERR_EMPTY_GROUP_NAME;
+
+    if (ONIGENC_IS_CODE_DIGIT(enc, c)) {
+      is_num = 1;
+    }
+    else if (c == '-') {
+      is_num = 2;
+      sign = -1;
+      pnum_head = p;
+    }
+    else {
+      r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME;
+    }
+  }
+
   while (!PEND) {
     name_end = p;
-    if (enc_len(enc, p) > 1)
-      r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME;
 
     PFETCH(c);
-    if (c == '>' || c == ')') break;
+    if (c == end_code || c == ')') break;
     if (! ONIGENC_IS_CODE_DIGIT(enc, c))
       r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME;
   }
-  if (c != '>') {
+  if (r == 0 && c != end_code) {
     r = ONIGERR_INVALID_GROUP_NAME;
     name_end = end;
   }
 
   if (r == 0) {
+    *rback_num = onig_scan_unsigned_number(&pnum_head, name_end, enc);
+    if (*rback_num < 0) return ONIGERR_TOO_BIG_NUMBER;
+    else if (*rback_num == 0) {
+      r = ONIGERR_INVALID_GROUP_NAME;
+      goto err;
+    }
+    *rback_num *= sign;
+
     *rname_end = name_end;
     *src = p;
     return 0;
@@ -2645,7 +2801,7 @@ fetch_name(UChar** src, UChar* end, UChar** rname_end, ScanEnv* env, int ref)
     return r;
   }
 }
-#endif
+#endif /* USE_NAMED_GROUP */
 
 static void
 CC_ESC_WARN(ScanEnv* env, UChar *c)
@@ -2663,7 +2819,7 @@ CC_ESC_WARN(ScanEnv* env, UChar *c)
 }
 
 static void
-CCEND_ESC_WARN(ScanEnv* env, UChar* c)
+CLOSE_BRACKET_WITHOUT_ESC_WARN(ScanEnv* env, UChar* c)
 {
   if (onig_warn == onig_null_warn) return ;
 
@@ -2687,12 +2843,12 @@ find_str_position(OnigCodePoint s[], int n, UChar* from, UChar* to,
   
   while (p < to) {
     x = ONIGENC_MBC_TO_CODE(enc, p, to);
-    q = p + enc_len(enc, p);
+    q = p + enclen(enc, p);
     if (x == s[0]) {
       for (i = 1; i < n && q < to; i++) {
        x = ONIGENC_MBC_TO_CODE(enc, q, to);
        if (x != s[i]) break;
-       q += enc_len(enc, q);
+       q += enclen(enc, q);
       }
       if (i >= n) {
        if (IS_NOT_NULL(next))
@@ -2707,7 +2863,7 @@ find_str_position(OnigCodePoint s[], int n, UChar* from, UChar* to,
 
 static int
 str_exist_check_with_esc(OnigCodePoint s[], int n, UChar* from, UChar* to,
-                        OnigCodePoint bad, OnigEncoding enc)
+                OnigCodePoint bad, OnigEncoding enc, OnigSyntaxType* syn)
 {
   int i, in_esc;
   OnigCodePoint x;
@@ -2718,24 +2874,24 @@ str_exist_check_with_esc(OnigCodePoint s[], int n, UChar* from, UChar* to,
   while (p < to) {
     if (in_esc) {
       in_esc = 0;
-      p += enc_len(enc, p);
+      p += enclen(enc, p);
     }
     else {
       x = ONIGENC_MBC_TO_CODE(enc, p, to);
-      q = p + enc_len(enc, p);
+      q = p + enclen(enc, p);
       if (x == s[0]) {
        for (i = 1; i < n && q < to; i++) {
          x = ONIGENC_MBC_TO_CODE(enc, q, to);
          if (x != s[i]) break;
-         q += enc_len(enc, q);
+         q += enclen(enc, q);
        }
        if (i >= n) return 1;
-       p += enc_len(enc, p);
+       p += enclen(enc, p);
       }
       else {
        x = ONIGENC_MBC_TO_CODE(enc, p, to);
        if (x == bad) return 0;
-       else if (x == MC_ESC(enc)) in_esc = 1;
+       else if (x == MC_ESC(syn)) in_esc = 1;
        p = q;
       }
     }
@@ -2771,7 +2927,7 @@ fetch_token_in_cc(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)
   else if (c == '-') {
     tok->type = TK_CC_RANGE;
   }
-  else if (c == MC_ESC(enc)) {
+  else if (c == MC_ESC(syn)) {
     if (! IS_SYNTAX_BV(syn, ONIG_SYN_BACKSLASH_ESCAPE_IN_CC))
       goto end;
 
@@ -2783,37 +2939,45 @@ fetch_token_in_cc(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)
     switch (c) {
     case 'w':
       tok->type = TK_CHAR_TYPE;
-      tok->u.subtype = CTYPE_WORD;
+      tok->u.prop.ctype = ONIGENC_CTYPE_WORD;
+      tok->u.prop.not   = 0;
       break;
     case 'W':
       tok->type = TK_CHAR_TYPE;
-      tok->u.subtype = CTYPE_NOT_WORD;
+      tok->u.prop.ctype = ONIGENC_CTYPE_WORD;
+      tok->u.prop.not   = 1;
       break;
     case 'd':
       tok->type = TK_CHAR_TYPE;
-      tok->u.subtype = CTYPE_DIGIT;
+      tok->u.prop.ctype = ONIGENC_CTYPE_DIGIT;
+      tok->u.prop.not   = 0;
       break;
     case 'D':
       tok->type = TK_CHAR_TYPE;
-      tok->u.subtype = CTYPE_NOT_DIGIT;
+      tok->u.prop.ctype = ONIGENC_CTYPE_DIGIT;
+      tok->u.prop.not   = 1;
       break;
     case 's':
       tok->type = TK_CHAR_TYPE;
-      tok->u.subtype = CTYPE_WHITE_SPACE;
+      tok->u.prop.ctype = ONIGENC_CTYPE_SPACE;
+      tok->u.prop.not   = 0;
       break;
     case 'S':
       tok->type = TK_CHAR_TYPE;
-      tok->u.subtype = CTYPE_NOT_WHITE_SPACE;
+      tok->u.prop.ctype = ONIGENC_CTYPE_SPACE;
+      tok->u.prop.not   = 1;
       break;
     case 'h':
       if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_H_XDIGIT)) break;
       tok->type = TK_CHAR_TYPE;
-      tok->u.subtype = CTYPE_XDIGIT;
+      tok->u.prop.ctype = ONIGENC_CTYPE_XDIGIT;
+      tok->u.prop.not   = 0;
       break;
     case 'H':
       if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_H_XDIGIT)) break;
       tok->type = TK_CHAR_TYPE;
-      tok->u.subtype = CTYPE_NOT_XDIGIT;
+      tok->u.prop.ctype = ONIGENC_CTYPE_XDIGIT;
+      tok->u.prop.not   = 1;
       break;
 
     case 'p':
@@ -2850,7 +3014,7 @@ fetch_token_in_cc(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)
             return ONIGERR_TOO_LONG_WIDE_CHAR_VALUE;
         }
 
-       if (p > prev + enc_len(enc, prev) && !PEND && (PPEEK_IS('}'))) {
+       if (p > prev + enclen(enc, prev) && !PEND && (PPEEK_IS('}'))) {
          PINC;
          tok->type   = TK_CODE_POINT;
          tok->base   = 16;
@@ -2922,7 +3086,7 @@ fetch_token_in_cc(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)
       tok->backp = p; /* point at '[' is readed */
       PINC;
       if (str_exist_check_with_esc(send, 2, p, end,
-                                   (OnigCodePoint )']', enc)) {
+                                   (OnigCodePoint )']', enc, syn)) {
        tok->type = TK_POSIX_BRACKET_OPEN;
       }
       else {
@@ -2975,7 +3139,7 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)
   tok->backp = p;
 
   PFETCH(c);
-  if (IS_MC_ESC_CODE(c, enc, syn)) {
+  if (IS_MC_ESC_CODE(c, syn)) {
     if (PEND) return ONIGERR_END_PATTERN_AT_ESCAPE;
 
     tok->backp = p;
@@ -3062,13 +3226,15 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)
     case 'w':
       if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_W_WORD)) break;
       tok->type = TK_CHAR_TYPE;
-      tok->u.subtype = CTYPE_WORD;
+      tok->u.prop.ctype = ONIGENC_CTYPE_WORD;
+      tok->u.prop.not   = 0;
       break;
 
     case 'W':
       if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_W_WORD)) break;
       tok->type = TK_CHAR_TYPE;
-      tok->u.subtype = CTYPE_NOT_WORD;
+      tok->u.prop.ctype = ONIGENC_CTYPE_WORD;
+      tok->u.prop.not   = 1;
       break;
 
     case 'b':
@@ -3100,37 +3266,43 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)
     case 's':
       if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_S_WHITE_SPACE)) break;
       tok->type = TK_CHAR_TYPE;
-      tok->u.subtype = CTYPE_WHITE_SPACE;
+      tok->u.prop.ctype = ONIGENC_CTYPE_SPACE;
+      tok->u.prop.not   = 0;
       break;
 
     case 'S':
       if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_S_WHITE_SPACE)) break;
       tok->type = TK_CHAR_TYPE;
-      tok->u.subtype = CTYPE_NOT_WHITE_SPACE;
+      tok->u.prop.ctype = ONIGENC_CTYPE_SPACE;
+      tok->u.prop.not   = 1;
       break;
 
     case 'd':
       if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_D_DIGIT)) break;
       tok->type = TK_CHAR_TYPE;
-      tok->u.subtype = CTYPE_DIGIT;
+      tok->u.prop.ctype = ONIGENC_CTYPE_DIGIT;
+      tok->u.prop.not   = 0;
       break;
 
     case 'D':
       if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_D_DIGIT)) break;
       tok->type = TK_CHAR_TYPE;
-      tok->u.subtype = CTYPE_NOT_DIGIT;
+      tok->u.prop.ctype = ONIGENC_CTYPE_DIGIT;
+      tok->u.prop.not   = 1;
       break;
 
     case 'h':
       if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_H_XDIGIT)) break;
       tok->type = TK_CHAR_TYPE;
-      tok->u.subtype = CTYPE_XDIGIT;
+      tok->u.prop.ctype = ONIGENC_CTYPE_XDIGIT;
+      tok->u.prop.not   = 0;
       break;
 
     case 'H':
       if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_H_XDIGIT)) break;
       tok->type = TK_CHAR_TYPE;
-      tok->u.subtype = CTYPE_NOT_XDIGIT;
+      tok->u.prop.ctype = ONIGENC_CTYPE_XDIGIT;
+      tok->u.prop.not   = 1;
       break;
 
     case 'A':
@@ -3182,7 +3354,7 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)
             return ONIGERR_TOO_LONG_WIDE_CHAR_VALUE;
         }
 
-       if ((p > prev + enc_len(enc, prev)) && !PEND && PPEEK_IS('}')) {
+       if ((p > prev + enclen(enc, prev)) && !PEND && PPEEK_IS('}')) {
          PINC;
          tok->type   = TK_CODE_POINT;
          tok->u.code = (OnigCodePoint )num;
@@ -3240,7 +3412,7 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)
        tok->u.backref.num     = 1;
        tok->u.backref.ref1    = num;
        tok->u.backref.by_name = 0;
-#ifdef USE_BACKREF_AT_LEVEL
+#ifdef USE_BACKREF_WITH_LEVEL
        tok->u.backref.exist_level = 0;
 #endif
        break;
@@ -3276,46 +3448,67 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)
     case 'k':
       if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_K_NAMED_BACKREF)) {
        PFETCH(c);
-       if (c == '<') {
+       if (c == '<' || c == '\'') {
          UChar* name_end;
          int* backs;
+         int back_num;
 
          prev = p;
 
-#ifdef USE_BACKREF_AT_LEVEL
+#ifdef USE_BACKREF_WITH_LEVEL
          name_end = NULL_UCHARP; /* no need. escape gcc warning. */
-         r = fetch_name_with_level(&p, end, &name_end, env, &tok->u.backref.level);
+         r = fetch_name_with_level((OnigCodePoint )c, &p, end, &name_end,
+                                   env, &back_num, &tok->u.backref.level);
          if (r == 1) tok->u.backref.exist_level = 1;
          else        tok->u.backref.exist_level = 0;
 #else
-         r = fetch_name(&p, end, &name_end, env, 1);
+         r = fetch_name(&p, end, &name_end, env, &back_num, 1);
 #endif
          if (r < 0) return r;
 
-         num = onig_name_to_group_numbers(env->reg, prev, name_end, &backs);
-         if (num <= 0) {
-           onig_scan_env_set_error_string(env,
-                           ONIGERR_UNDEFINED_NAME_REFERENCE, prev, name_end);
-           return ONIGERR_UNDEFINED_NAME_REFERENCE;
-         }
-         if (IS_SYNTAX_BV(syn, ONIG_SYN_STRICT_CHECK_BACKREF)) {
-           int i;
-           for (i = 0; i < num; i++) {
-             if (backs[i] > env->num_mem ||
-                 IS_NULL(SCANENV_MEM_NODES(env)[backs[i]]))
+         if (back_num != 0) {
+           if (back_num < 0) {
+             back_num = BACKREF_REL_TO_ABS(back_num, env);
+             if (back_num <= 0)
                return ONIGERR_INVALID_BACKREF;
            }
-         }
 
-         tok->type = TK_BACKREF;
-         tok->u.backref.by_name = 1;
-         if (num == 1) {
+           if (IS_SYNTAX_BV(syn, ONIG_SYN_STRICT_CHECK_BACKREF)) {
+             if (back_num > env->num_mem ||
+                 IS_NULL(SCANENV_MEM_NODES(env)[back_num]))
+               return ONIGERR_INVALID_BACKREF;
+           }
+           tok->type = TK_BACKREF;
+           tok->u.backref.by_name = 0;
            tok->u.backref.num  = 1;
-           tok->u.backref.ref1 = backs[0];
+           tok->u.backref.ref1 = back_num;
          }
          else {
-           tok->u.backref.num  = num;
-           tok->u.backref.refs = backs;
+           num = onig_name_to_group_numbers(env->reg, prev, name_end, &backs);
+           if (num <= 0) {
+             onig_scan_env_set_error_string(env,
+                            ONIGERR_UNDEFINED_NAME_REFERENCE, prev, name_end);
+             return ONIGERR_UNDEFINED_NAME_REFERENCE;
+           }
+           if (IS_SYNTAX_BV(syn, ONIG_SYN_STRICT_CHECK_BACKREF)) {
+             int i;
+             for (i = 0; i < num; i++) {
+               if (backs[i] > env->num_mem ||
+                   IS_NULL(SCANENV_MEM_NODES(env)[backs[i]]))
+                 return ONIGERR_INVALID_BACKREF;
+             }
+           }
+
+           tok->type = TK_BACKREF;
+           tok->u.backref.by_name = 1;
+           if (num == 1) {
+             tok->u.backref.num  = 1;
+             tok->u.backref.ref1 = backs[0];
+           }
+           else {
+             tok->u.backref.num  = num;
+             tok->u.backref.refs = backs;
+           }
          }
        }
        else
@@ -3328,16 +3521,18 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)
     case 'g':
       if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_G_SUBEXP_CALL)) {
        PFETCH(c);
-       if (c == '<') {
+       if (c == '<' || c == '\'') {
+         int gnum;
          UChar* name_end;
 
          prev = p;
-         r = fetch_name(&p, end, &name_end, env, 1);
+         r = fetch_name((OnigCodePoint )c, &p, end, &name_end, env, &gnum, 1);
          if (r < 0) return r;
 
          tok->type = TK_CALL;
          tok->u.call.name     = prev;
          tok->u.call.name_end = name_end;
+         tok->u.call.gnum     = gnum;
        }
        else
          PUNFETCH;
@@ -3380,7 +3575,7 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)
        tok->u.code = (OnigCodePoint )num;
       }
       else { /* string */
-       p = tok->backp + enc_len(enc, tok->backp);
+       p = tok->backp + enclen(enc, tok->backp);
       }
       break;
     }
@@ -3392,15 +3587,15 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)
 #ifdef USE_VARIABLE_META_CHARS
     if ((c != ONIG_INEFFECTIVE_META_CHAR) &&
        IS_SYNTAX_OP(syn, ONIG_SYN_OP_VARIABLE_META_CHARACTERS)) {
-      if (c == MC_ANYCHAR(enc))
+      if (c == MC_ANYCHAR(syn))
        goto any_char;
-      else if (c == MC_ANYTIME(enc))
+      else if (c == MC_ANYTIME(syn))
        goto anytime;
-      else if (c == MC_ZERO_OR_ONE_TIME(enc))
+      else if (c == MC_ZERO_OR_ONE_TIME(syn))
        goto zero_or_one_time;
-      else if (c == MC_ONE_OR_MORE_TIME(enc))
+      else if (c == MC_ONE_OR_MORE_TIME(syn))
        goto one_or_more_time;
-      else if (c == MC_ANYCHAR_ANYTIME(enc)) {
+      else if (c == MC_ANYCHAR_ANYTIME(syn)) {
        tok->type = TK_ANYCHAR_ANYTIME;
        goto out;
       }
@@ -3477,7 +3672,7 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)
           while (1) {
             if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;
             PFETCH(c);
-            if (c == MC_ESC(enc)) {
+            if (c == MC_ESC(syn)) {
               if (!PEND) PFETCH(c);
             }
             else {
@@ -3519,7 +3714,7 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)
 
     case ']':
       if (*src > env->pattern)   /* /].../ is allowed. */
-       CCEND_ESC_WARN(env, (UChar* )"]");
+       CLOSE_BRACKET_WITHOUT_ESC_WARN(env, (UChar* )"]");
       break;
 
     case '#':
@@ -3553,24 +3748,36 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)
 }
 
 static int
-add_ctype_to_cc_by_range(CClassNode* cc, int ctype, int not, OnigEncoding enc,
-                         const OnigCodePoint sbr[], const OnigCodePoint mbr[])
+add_ctype_to_cc_by_range(CClassNode* cc, int ctype ARG_UNUSED, int not,
+                        OnigEncoding enc ARG_UNUSED,
+                         OnigCodePoint sb_out, const OnigCodePoint mbr[])
 {
   int i, r;
   OnigCodePoint j;
 
-  int nsb = ONIGENC_CODE_RANGE_NUM(sbr);
-  int nmb = ONIGENC_CODE_RANGE_NUM(mbr);
+  int n = ONIGENC_CODE_RANGE_NUM(mbr);
 
   if (not == 0) {
-    for (i = 0; i < nsb; i++) {
-      for (j  = ONIGENC_CODE_RANGE_FROM(sbr, i);
-           j <= ONIGENC_CODE_RANGE_TO(sbr, i); j++) {
+    for (i = 0; i < n; i++) {
+      for (j  = ONIGENC_CODE_RANGE_FROM(mbr, i);
+           j <= ONIGENC_CODE_RANGE_TO(mbr, i); j++) {
+       if (j >= sb_out) {
+         if (j == ONIGENC_CODE_RANGE_TO(mbr, i)) i++;
+         else if (j > ONIGENC_CODE_RANGE_FROM(mbr, i)) {
+           r = add_code_range_to_buf(&(cc->mbuf), j,
+                                     ONIGENC_CODE_RANGE_TO(mbr, i));
+           if (r != 0) return r;
+           i++;
+         }
+
+         goto sb_end;
+       }
         BITSET_SET_BIT(cc->bs, j);
       }
     }
 
-    for (i = 0; i < nmb; i++) {
+  sb_end:
+    for ( ; i < n; i++) {
       r = add_code_range_to_buf(&(cc->mbuf),
                                 ONIGENC_CODE_RANGE_FROM(mbr, i),
                                 ONIGENC_CODE_RANGE_TO(mbr, i));
@@ -3580,24 +3787,24 @@ add_ctype_to_cc_by_range(CClassNode* cc, int ctype, int not, OnigEncoding enc,
   else {
     OnigCodePoint prev = 0;
 
-    if (ONIGENC_MBC_MINLEN(enc) == 1) {
-      for (i = 0; i < nsb; i++) {
-        for (j = prev;
-             j < ONIGENC_CODE_RANGE_FROM(sbr, i); j++) {
-          BITSET_SET_BIT(cc->bs, j);
-        }
-        prev = ONIGENC_CODE_RANGE_TO(sbr, i) + 1;
-      }
-      if (prev < 0x7f) {
-        for (j = prev; j < 0x7f; j++) {
-          BITSET_SET_BIT(cc->bs, j);
-        }
+    for (i = 0; i < n; i++) {
+      for (j = prev;
+          j < ONIGENC_CODE_RANGE_FROM(mbr, i); j++) {
+       if (j >= sb_out) {
+         goto sb_end2;
+       }
+       BITSET_SET_BIT(cc->bs, j);
       }
-
-      prev = 0x80;
+      prev = ONIGENC_CODE_RANGE_TO(mbr, i) + 1;
+    }
+    for (j = prev; j < sb_out; j++) {
+      BITSET_SET_BIT(cc->bs, j);
     }
 
-    for (i = 0; i < nmb; i++) {
+  sb_end2:
+    prev = sb_out;
+
+    for (i = 0; i < n; i++) {
       if (prev < ONIGENC_CODE_RANGE_FROM(mbr, i)) {
        r = add_code_range_to_buf(&(cc->mbuf), prev,
                                   ONIGENC_CODE_RANGE_FROM(mbr, i) - 1);
@@ -3618,12 +3825,13 @@ static int
 add_ctype_to_cc(CClassNode* cc, int ctype, int not, ScanEnv* env)
 {
   int c, r;
-  const OnigCodePoint *sbr, *mbr;
+  const OnigCodePoint *ranges;
+  OnigCodePoint sb_out;
   OnigEncoding enc = env->enc;
 
-  r = ONIGENC_GET_CTYPE_CODE_RANGE(enc, ctype, &sbr, &mbr);
+  r = ONIGENC_GET_CTYPE_CODE_RANGE(enc, ctype, &sb_out, &ranges);
   if (r == 0) {
-    return add_ctype_to_cc_by_range(cc, ctype, not, env->enc, sbr, mbr);
+    return add_ctype_to_cc_by_range(cc, ctype, not, env->enc, sb_out, ranges);
   }
   else if (r != ONIG_NO_SUPPORT_CONFIG) {
     return r;
@@ -3677,13 +3885,13 @@ add_ctype_to_cc(CClassNode* cc, int ctype, int not, ScanEnv* env)
   case ONIGENC_CTYPE_WORD:
     if (not == 0) {
       for (c = 0; c < SINGLE_BYTE_SIZE; c++) {
-       if (ONIGENC_IS_CODE_SB_WORD(enc, c)) BITSET_SET_BIT(cc->bs, c);
+       if (IS_CODE_SB_WORD(enc, c)) BITSET_SET_BIT(cc->bs, c);
       }
       ADD_ALL_MULTI_BYTE_RANGE(enc, cc->mbuf);
     }
     else {
       for (c = 0; c < SINGLE_BYTE_SIZE; c++) {
-        if ((ONIGENC_CODE_TO_MBCLEN(enc, c) > 0)  /* 0: invalid code point */
+        if ((ONIGENC_CODE_TO_MBCLEN(enc, c) > 0) /* check invalid code point */
            && ! ONIGENC_IS_CODE_WORD(enc, c))
          BITSET_SET_BIT(cc->bs, c);
       }
@@ -3698,62 +3906,11 @@ add_ctype_to_cc(CClassNode* cc, int ctype, int not, ScanEnv* env)
   return r;
 }
 
-static int
-parse_ctype_to_enc_ctype(int pctype, int* not)
-{
-  int ctype;
-
-  switch (pctype) {
-  case CTYPE_WORD:
-    ctype = ONIGENC_CTYPE_WORD;
-    *not = 0;
-    break;
-  case CTYPE_NOT_WORD:
-    ctype = ONIGENC_CTYPE_WORD;
-    *not = 1;
-    break;
-  case CTYPE_WHITE_SPACE:
-    ctype = ONIGENC_CTYPE_SPACE;
-    *not = 0;
-    break;
-  case CTYPE_NOT_WHITE_SPACE:
-    ctype = ONIGENC_CTYPE_SPACE;
-    *not = 1;
-    break;
-  case CTYPE_DIGIT:
-    ctype = ONIGENC_CTYPE_DIGIT;
-    *not = 0;
-    break;
-  case CTYPE_NOT_DIGIT:
-    ctype = ONIGENC_CTYPE_DIGIT;
-    *not = 1;
-    break;
-  case CTYPE_XDIGIT:
-    ctype = ONIGENC_CTYPE_XDIGIT;
-    *not = 0;
-    break;
-  case CTYPE_NOT_XDIGIT:
-    ctype = ONIGENC_CTYPE_XDIGIT;
-    *not = 1;
-    break;
-  default:
-    return ONIGERR_PARSER_BUG;
-    break;
-  }
-  return ctype;
-}
-
-typedef struct {
-  UChar    *name;
-  int       ctype;
-  short int len;
-} PosixBracketEntryType;
-
 static int
 parse_posix_bracket(CClassNode* cc, UChar** src, UChar* end, ScanEnv* env)
 {
 #define POSIX_BRACKET_CHECK_LIMIT_LENGTH  20
-#define POSIX_BRACKET_NAME_MAX_LEN         6
+#define POSIX_BRACKET_NAME_MIN_LEN         4
 
   static PosixBracketEntryType PBS[] = {
     { (UChar* )"alnum",  ONIGENC_CTYPE_ALNUM,  5 },
@@ -3769,7 +3926,8 @@ parse_posix_bracket(CClassNode* cc, UChar** src, UChar* end, ScanEnv* env)
     { (UChar* )"upper",  ONIGENC_CTYPE_UPPER,  5 },
     { (UChar* )"xdigit", ONIGENC_CTYPE_XDIGIT, 6 },
     { (UChar* )"ascii",  ONIGENC_CTYPE_ASCII,  5 },
-    { (UChar* )NULL, -1, 0 }
+    { (UChar* )"word",   ONIGENC_CTYPE_WORD,   4 },
+    { (UChar* )NULL,     -1, 0 }
   };
 
   PosixBracketEntryType *pb;
@@ -3786,7 +3944,7 @@ parse_posix_bracket(CClassNode* cc, UChar** src, UChar* end, ScanEnv* env)
   else
     not = 0;
 
-  if (onigenc_strlen(enc, p, end) < POSIX_BRACKET_NAME_MAX_LEN + 2)
+  if (onigenc_strlen(enc, p, end) < POSIX_BRACKET_NAME_MIN_LEN + 3)
     goto not_posix_bracket;
 
   for (pb = PBS; IS_NOT_NULL(pb->name); pb++) {
@@ -3820,86 +3978,39 @@ parse_posix_bracket(CClassNode* cc, UChar** src, UChar* end, ScanEnv* env)
     }
   }
 
-  return 1;   /* 1: is not POSIX bracket, but no error. */
-}
-
-static int
-property_name_to_ctype(UChar* p, UChar* end, OnigEncoding enc)
-{
-  static PosixBracketEntryType PBS[] = {
-    { (UChar* )"Alnum",  ONIGENC_CTYPE_ALNUM,  5 },
-    { (UChar* )"Alpha",  ONIGENC_CTYPE_ALPHA,  5 },
-    { (UChar* )"Blank",  ONIGENC_CTYPE_BLANK,  5 },
-    { (UChar* )"Cntrl",  ONIGENC_CTYPE_CNTRL,  5 },
-    { (UChar* )"Digit",  ONIGENC_CTYPE_DIGIT,  5 },
-    { (UChar* )"Graph",  ONIGENC_CTYPE_GRAPH,  5 },
-    { (UChar* )"Lower",  ONIGENC_CTYPE_LOWER,  5 },
-    { (UChar* )"Print",  ONIGENC_CTYPE_PRINT,  5 },
-    { (UChar* )"Punct",  ONIGENC_CTYPE_PUNCT,  5 },
-    { (UChar* )"Space",  ONIGENC_CTYPE_SPACE,  5 },
-    { (UChar* )"Upper",  ONIGENC_CTYPE_UPPER,  5 },
-    { (UChar* )"XDigit", ONIGENC_CTYPE_XDIGIT, 6 },
-    { (UChar* )"ASCII",  ONIGENC_CTYPE_ASCII,  5 },
-    { (UChar* )NULL, -1, 0 }
-  };
-
-  PosixBracketEntryType *pb;
-  int len;
-
-  len = onigenc_strlen(enc, p, end);
-  for (pb = PBS; IS_NOT_NULL(pb->name); pb++) {
-    if (len == pb->len &&
-        onigenc_with_ascii_strncmp(enc, p, end, pb->name, pb->len) == 0)
-      return pb->ctype;
-  }
-
-  return -1;
+  return 1;  /* 1: is not POSIX bracket, but no error. */
 }
 
 static int
 fetch_char_property_to_ctype(UChar** src, UChar* end, ScanEnv* env)
 {
-  int ctype;
+  int r;
   OnigCodePoint c;
   OnigEncoding enc = env->enc;
   UChar *prev, *start, *p = *src;
   PFETCH_READY;
 
-  /* 'IsXXXX' => 'XXXX' */
-  if (!PEND &&
-      IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_CHAR_PROPERTY_PREFIX_IS)) {
-    c = PPEEK;
-    if (c == 'I') {
-      PINC;
-      if (! PEND) {
-       c = PPEEK;
-       if (c == 's')
-         PINC;
-       else
-         PUNFETCH;
-      }
-    }
-  }
-
+  r = 0;
   start = prev = p;
 
   while (!PEND) {
     prev = p;
     PFETCH(c);
     if (c == '}') {
-      ctype = property_name_to_ctype(start, prev, enc);
-      if (ctype < 0) break;
+      r = ONIGENC_PROPERTY_NAME_TO_CTYPE(enc, start, prev);
+      if (r < 0) break;
 
       *src = p;
-      return ctype;
+      return r;
     }
-    else if (c == '(' || c == ')' || c == '{' || c == '|')
+    else if (c == '(' || c == ')' || c == '{' || c == '|') {
+      r = ONIGERR_INVALID_CHAR_PROPERTY_NAME;
       break;
+    }
   }
 
-  onig_scan_env_set_error_string(env, ONIGERR_INVALID_CHAR_PROPERTY_NAME,
-                                *src, prev);
-  return ONIGERR_INVALID_CHAR_PROPERTY_NAME;
+  onig_scan_env_set_error_string(env, r, *src, prev);
+  return r;
 }
 
 static int
@@ -3913,11 +4024,11 @@ parse_char_property(Node** np, OnigToken* tok, UChar** src, UChar* end,
   if (ctype < 0) return ctype;
 
   *np = node_new_cclass();
-  CHECK_NULL_RETURN_VAL(*np, ONIGERR_MEMORY);
-  cc = &(NCCLASS(*np));
+  CHECK_NULL_RETURN_MEMERR(*np);
+  cc = NCCLASS(*np);
   r = add_ctype_to_cc(cc, ctype, 0, env);
   if (r != 0) return r;
-  if (tok->u.prop.not != 0) CCLASS_SET_NOT(cc);
+  if (tok->u.prop.not != 0) NCCLASS_SET_NOT(cc);
 
   return 0;
 }
@@ -3981,7 +4092,7 @@ next_state_val(CClassNode* cc, OnigCodePoint *vs, OnigCodePoint v,
     if (intype == *type) {
       if (intype == CCV_SB) {
         if (*vs > 0xff || v > 0xff)
-          return ONIGERR_INVALID_WIDE_CHAR_VALUE;
+          return ONIGERR_INVALID_CODE_POINT_VALUE;
 
        if (*vs > v) {
          if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC))
@@ -4036,10 +4147,11 @@ next_state_val(CClassNode* cc, OnigCodePoint *vs, OnigCodePoint v,
 
 static int
 code_exist_check(OnigCodePoint c, UChar* from, UChar* end, int ignore_escaped,
-                OnigEncoding enc)
+                ScanEnv* env)
 {
   int in_esc;
   OnigCodePoint code;
+  OnigEncoding enc = env->enc;
   UChar* p = from;
   PFETCH_READY;
 
@@ -4051,7 +4163,7 @@ code_exist_check(OnigCodePoint c, UChar* from, UChar* end, int ignore_escaped,
     else {
       PFETCH(code);
       if (code == c) return 1;
-      if (code == MC_ESC(enc)) in_esc = 1;
+      if (code == MC_ESC(env->syntax)) in_esc = 1;
     }
   }
   return 0;
@@ -4086,7 +4198,7 @@ parse_char_class(Node** np, OnigToken* tok, UChar** src, UChar* end,
   if (r < 0) return r;
   if (r == TK_CC_CLOSE) {
     if (! code_exist_check((OnigCodePoint )']',
-                           *src, env->pattern_end, 1, env->enc))
+                           *src, env->pattern_end, 1, env))
       return ONIGERR_EMPTY_CHAR_CLASS;
 
     CC_ESC_WARN(env, (UChar* )"]");
@@ -4094,8 +4206,8 @@ parse_char_class(Node** np, OnigToken* tok, UChar** src, UChar* end,
   }
 
   *np = node = node_new_cclass();
-  CHECK_NULL_RETURN_VAL(node, ONIGERR_MEMORY);
-  cc = &(NCCLASS(node));
+  CHECK_NULL_RETURN_MEMERR(node);
+  cc = NCCLASS(node);
 
   and_start = 0;
   state = CCS_START;
@@ -4108,6 +4220,10 @@ parse_char_class(Node** np, OnigToken* tok, UChar** src, UChar* end,
       if (len > 1) {
        in_type = CCV_CODE_POINT;
       }
+      else if (len < 0) {
+       r = len;
+       goto err;
+      }
       else {
       sb_char:
        in_type = CCV_SB;
@@ -4141,7 +4257,7 @@ parse_char_class(Node** np, OnigToken* tok, UChar** src, UChar* end,
          goto err;
        }
 
-       len = enc_len(env->enc, buf);
+       len = enclen(env->enc, buf);
        if (i < len) {
          r = ONIGERR_TOO_SHORT_MULTI_BYTE_STRING;
          goto err;
@@ -4202,12 +4318,8 @@ parse_char_class(Node** np, OnigToken* tok, UChar** src, UChar* end,
       break;
 
     case TK_CHAR_TYPE:
-      {
-       int ctype, not;
-       ctype = parse_ctype_to_enc_ctype(tok->u.subtype, &not);
-       r = add_ctype_to_cc(cc, ctype, not, env);
-       if (r != 0) return r;
-      }
+      r = add_ctype_to_cc(cc, tok->u.prop.ctype, tok->u.prop.not, env);
+      if (r != 0) return r;
 
     next_class:
       r = next_state_class(cc, &vs, &val_type, &state, env);
@@ -4287,7 +4399,7 @@ parse_char_class(Node** np, OnigToken* tok, UChar** src, UChar* end,
 
        r = parse_char_class(&anode, tok, &p, end, env);
        if (r != 0) goto cc_open_err;
-       acc = &(NCCLASS(anode));
+       acc = NCCLASS(anode);
        r = or_cclass(cc, acc, env->enc);
 
        onig_node_free(anode);
@@ -4352,10 +4464,10 @@ parse_char_class(Node** np, OnigToken* tok, UChar** src, UChar* end,
   }
 
   if (neg != 0)
-    CCLASS_SET_NOT(cc);
+    NCCLASS_SET_NOT(cc);
   else
-    CCLASS_CLEAR_NOT(cc);
-  if (IS_CCLASS_NOT(cc) &&
+    NCCLASS_CLEAR_NOT(cc);
+  if (IS_NCCLASS_NOT(cc) &&
       IS_SYNTAX_BV(env->syntax, ONIG_SYN_NOT_NEWLINE_IN_NEGATIVE_CC)) {
     int is_empty;
 
@@ -4378,7 +4490,7 @@ parse_char_class(Node** np, OnigToken* tok, UChar** src, UChar* end,
   return 0;
 
  err:
-  if (cc != &(NCCLASS(*np)))
+  if (cc != NCCLASS(*np))
     bbuf_free(cc->mbuf);
   onig_node_free(*np);
   return r;
@@ -4388,15 +4500,19 @@ static int parse_subexp(Node** top, OnigToken* tok, int term,
                        UChar** src, UChar* end, ScanEnv* env);
 
 static int
-parse_effect(Node** np, OnigToken* tok, int term, UChar** src, UChar* end,
-            ScanEnv* env)
+parse_enclose(Node** np, OnigToken* tok, int term, UChar** src, UChar* end,
+             ScanEnv* env)
 {
   int r, num;
-  int list_capture;
   Node *target;
   OnigOptionType option;
-  OnigEncoding enc = env->enc;
   OnigCodePoint c;
+  OnigEncoding enc = env->enc;
+
+#ifdef USE_NAMED_GROUP
+  int list_capture;
+#endif
+
   UChar* p = *src;
   PFETCH_READY;
 
@@ -4428,9 +4544,19 @@ parse_effect(Node** np, OnigToken* tok, int term, UChar** src, UChar* end,
       *np = onig_node_new_anchor(ANCHOR_PREC_READ_NOT);
       break;
     case '>':            /* (?>...) stop backtrack */
-      *np = node_new_effect(EFFECT_STOP_BACKTRACK);
+      *np = node_new_enclose(ENCLOSE_STOP_BACKTRACK);
       break;
 
+#ifdef USE_NAMED_GROUP
+    case '\'':
+      if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP)) {
+       goto named_group1;
+      }
+      else
+       return ONIGERR_UNDEFINED_GROUP_OPTION;
+      break;
+#endif
+
     case '<':   /* look behind (?<=...), (?<!...) */
       PFETCH(c);
       if (c == '=')
@@ -4438,35 +4564,45 @@ parse_effect(Node** np, OnigToken* tok, int term, UChar** src, UChar* end,
       else if (c == '!')
        *np = onig_node_new_anchor(ANCHOR_LOOK_BEHIND_NOT);
 #ifdef USE_NAMED_GROUP
-      else if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP)) {
-       UChar *name;
-       UChar *name_end;
+      else {
+       if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP)) {
+         UChar *name;
+         UChar *name_end;
 
-       PUNFETCH;
-       list_capture = 0;
+         PUNFETCH;
+         c = '<';
 
-      named_group:
-       name = p;
-       r = fetch_name(&p, end, &name_end, env, 0);
-       if (r < 0) return r;
+       named_group1:
+         list_capture = 0;
 
-       num = scan_env_add_mem_entry(env);
-       if (num < 0) return num;
-       if (list_capture != 0 && num >= BIT_STATUS_BITS_NUM)
-         return ONIGERR_GROUP_NUMBER_OVER_FOR_CAPTURE_HISTORY;
+       named_group2:
+         name = p;
+         r = fetch_name((OnigCodePoint )c, &p, end, &name_end, env, &num, 0);
+         if (r < 0) return r;
 
-       r = name_add(env->reg, name, name_end, num, env);
-       if (r != 0) return r;
-       *np = node_new_effect_memory(env->option, 1);
-       CHECK_NULL_RETURN_VAL(*np, ONIGERR_MEMORY);
-       NEFFECT(*np).regnum = num;
-       if (list_capture != 0)
-         BIT_STATUS_ON_AT_SIMPLE(env->capture_history, num);
-       env->num_named++;
+         num = scan_env_add_mem_entry(env);
+         if (num < 0) return num;
+         if (list_capture != 0 && num >= (int )BIT_STATUS_BITS_NUM)
+           return ONIGERR_GROUP_NUMBER_OVER_FOR_CAPTURE_HISTORY;
+
+         r = name_add(env->reg, name, name_end, num, env);
+         if (r != 0) return r;
+         *np = node_new_enclose_memory(env->option, 1);
+         CHECK_NULL_RETURN_MEMERR(*np);
+         NENCLOSE(*np)->regnum = num;
+         if (list_capture != 0)
+           BIT_STATUS_ON_AT_SIMPLE(env->capture_history, num);
+         env->num_named++;
+       }
+       else {
+         return ONIGERR_UNDEFINED_GROUP_OPTION;
+       }
       }
-#endif
-      else
+#else
+      else {
        return ONIGERR_UNDEFINED_GROUP_OPTION;
+      }
+#endif
       break;
 
     case '@':
@@ -4474,25 +4610,25 @@ parse_effect(Node** np, OnigToken* tok, int term, UChar** src, UChar* end,
 #ifdef USE_NAMED_GROUP
        if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP)) {
          PFETCH(c);
-         if (c == '<') {
+         if (c == '<' || c == '\'') {
            list_capture = 1;
-           goto named_group; /* (?@<name>...) */
+           goto named_group2; /* (?@<name>...) */
          }
          PUNFETCH;
        }
 #endif
-       *np = node_new_effect_memory(env->option, 0);
-       CHECK_NULL_RETURN_VAL(*np, ONIGERR_MEMORY);
+       *np = node_new_enclose_memory(env->option, 0);
+       CHECK_NULL_RETURN_MEMERR(*np);
        num = scan_env_add_mem_entry(env);
        if (num < 0) {
          onig_node_free(*np);
          return num;
        }
-       else if (num >= BIT_STATUS_BITS_NUM) {
+       else if (num >= (int )BIT_STATUS_BITS_NUM) {
          onig_node_free(*np);
          return ONIGERR_GROUP_NUMBER_OVER_FOR_CAPTURE_HISTORY;
        }
-       NEFFECT(*np).regnum = num;
+       NENCLOSE(*np)->regnum = num;
        BIT_STATUS_ON_AT_SIMPLE(env->capture_history, num);
       }
       else {
@@ -4545,7 +4681,7 @@ parse_effect(Node** np, OnigToken* tok, int term, UChar** src, UChar* end,
 
          if (c == ')') {
            *np = node_new_option(option);
-           CHECK_NULL_RETURN_VAL(*np, ONIGERR_MEMORY);
+           CHECK_NULL_RETURN_MEMERR(*np);
            *src = p;
            return 2; /* option only */
          }
@@ -4559,8 +4695,8 @@ parse_effect(Node** np, OnigToken* tok, int term, UChar** src, UChar* end,
            env->option = prev;
            if (r < 0) return r;
            *np = node_new_option(option);
-           CHECK_NULL_RETURN_VAL(*np, ONIGERR_MEMORY);
-           NEFFECT(*np).target = target;
+           CHECK_NULL_RETURN_MEMERR(*np);
+           NENCLOSE(*np)->target = target;
            *src = p;
            return 0;
          }
@@ -4579,26 +4715,26 @@ parse_effect(Node** np, OnigToken* tok, int term, UChar** src, UChar* end,
     if (ONIG_IS_OPTION_ON(env->option, ONIG_OPTION_DONT_CAPTURE_GROUP))
       goto group;
 
-    *np = node_new_effect_memory(env->option, 0);
-    CHECK_NULL_RETURN_VAL(*np, ONIGERR_MEMORY);
+    *np = node_new_enclose_memory(env->option, 0);
+    CHECK_NULL_RETURN_MEMERR(*np);
     num = scan_env_add_mem_entry(env);
     if (num < 0) return num;
-    NEFFECT(*np).regnum = num;
+    NENCLOSE(*np)->regnum = num;
   }
 
-  CHECK_NULL_RETURN_VAL(*np, ONIGERR_MEMORY);
+  CHECK_NULL_RETURN_MEMERR(*np);
   r = fetch_token(tok, &p, end, env);
   if (r < 0) return r;
   r = parse_subexp(&target, tok, term, &p, end, env);
   if (r < 0) return r;
 
-  if (NTYPE(*np) == N_ANCHOR)
-    NANCHOR(*np).target = target;
+  if (NTYPE(*np) == NT_ANCHOR)
+    NANCHOR(*np)->target = target;
   else {
-    NEFFECT(*np).target = target;
-    if (NEFFECT(*np).type == EFFECT_MEMORY) {
+    NENCLOSE(*np)->target = target;
+    if (NENCLOSE(*np)->type == ENCLOSE_MEMORY) {
       /* Don't move this to previous of parse_subexp() */
-      r = scan_env_set_mem_node(env, NEFFECT(*np).regnum, *np);
+      r = scan_env_set_mem_node(env, NENCLOSE(*np)->regnum, *np);
       if (r != 0) return r;
     }
   }
@@ -4618,17 +4754,17 @@ static const char* ReduceQStr[] = {
 static int
 set_quantifier(Node* qnode, Node* target, int group, ScanEnv* env)
 {
-  QuantifierNode* qn;
+  QtfrNode* qn;
 
-  qn = &(NQUANTIFIER(qnode));
+  qn = NQTFR(qnode);
   if (qn->lower == 1 && qn->upper == 1) {
     return 1;
   }
 
   switch (NTYPE(target)) {
-  case N_STRING:
+  case NT_STR:
     if (! group) {
-      StrNode* sn = &(NSTRING(target));
+      StrNode* sn = NSTR(target);
       if (str_node_can_be_split(sn, env->enc)) {
        Node* n = str_node_split_last_char(sn, env->enc);
        if (IS_NOT_NULL(n)) {
@@ -4639,10 +4775,10 @@ set_quantifier(Node* qnode, Node* target, int group, ScanEnv* env)
     }
     break;
 
-  case N_QUANTIFIER:
+  case NT_QTFR:
     { /* check redundant double repeat. */
       /* verbose warn (?:.?)? etc... but not warn (.?)? etc... */
-      QuantifierNode* qnt = &(NQUANTIFIER(target));
+      QtfrNode* qnt   = NQTFR(target);
       int nestq_num   = popular_quantifier_num(qn);
       int targetq_num = popular_quantifier_num(qnt);
 
@@ -4705,6 +4841,7 @@ set_quantifier(Node* qnode, Node* target, int group, ScanEnv* env)
   return 0;
 }
 
+
 #ifdef USE_SHARED_CCLASS_TABLE
 
 #define THRESHOLD_RANGE_NUM_FOR_SHARE_CCLASS     8
@@ -4728,17 +4865,17 @@ static int type_cclass_cmp(type_cclass_key* x, type_cclass_key* y)
 static int type_cclass_hash(type_cclass_key* key)
 {
   int i, val;
-  unsigned char *p;
+  UChar *p;
 
   val = 0;
 
-  p = (unsigned char* )&(key->enc);
-  for (i = 0; i < sizeof(key->enc); i++) {
+  p = (UChar* )&(key->enc);
+  for (i = 0; i < (int )sizeof(key->enc); i++) {
     val = val * 997 + (int )*p++;
   }
 
-  p = (unsigned char* )(&key->type);
-  for (i = 0; i < sizeof(key->type); i++) {
+  p = (UChar* )(&key->type);
+  for (i = 0; i < (int )sizeof(key->type); i++) {
     val = val * 997 + (int )*p++;
   }
 
@@ -4755,10 +4892,10 @@ static st_table* OnigTypeCClassTable;
 
 
 static int
-i_free_shared_class(type_cclass_key* key, Node* node, void* arg)
+i_free_shared_class(type_cclass_key* key, Node* node, void* arg ARG_UNUSED)
 {
   if (IS_NOT_NULL(node)) {
-    CClassNode* cc = &(NCCLASS(node));
+    CClassNode* cc = NCCLASS(node);
     if (IS_NOT_NULL(cc->mbuf)) xfree(cc->mbuf);
     xfree(node);
   }
@@ -4782,6 +4919,118 @@ onig_free_shared_cclass_table(void)
 #endif /* USE_SHARED_CCLASS_TABLE */
 
 
+#ifndef CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS
+static int
+clear_not_flag_cclass(CClassNode* cc, OnigEncoding enc)
+{
+  BBuf *tbuf;
+  int r;
+
+  if (IS_NCCLASS_NOT(cc)) {
+    bitset_invert(cc->bs);
+
+    if (! ONIGENC_IS_SINGLEBYTE(enc)) {
+      r = not_code_range_buf(enc, cc->mbuf, &tbuf);
+      if (r != 0) return r;
+
+      bbuf_free(cc->mbuf);
+      cc->mbuf = tbuf;
+    }
+
+    NCCLASS_CLEAR_NOT(cc);
+  }
+
+  return 0;
+}
+#endif /* CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS */
+
+typedef struct {
+  ScanEnv*    env;
+  CClassNode* cc;
+  Node*       alt_root;
+  Node**      ptail;
+} IApplyCaseFoldArg;
+
+static int
+i_apply_case_fold(OnigCodePoint from, OnigCodePoint to[],
+                 int to_len, void* arg)
+{
+  IApplyCaseFoldArg* iarg;
+  ScanEnv* env;
+  CClassNode* cc;
+  BitSetRef bs;
+
+  iarg = (IApplyCaseFoldArg* )arg;
+  env = iarg->env;
+  cc  = iarg->cc;
+  bs = cc->bs;
+
+  if (to_len == 1) {
+    int is_in = onig_is_code_in_cc(env->enc, from, cc);
+#ifdef CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS
+    if ((is_in != 0 && !IS_NCCLASS_NOT(cc)) ||
+       (is_in == 0 &&  IS_NCCLASS_NOT(cc))) {
+      if (ONIGENC_MBC_MINLEN(env->enc) > 1 || *to >= SINGLE_BYTE_SIZE) {
+       add_code_range(&(cc->mbuf), env, *to, *to);
+      }
+      else {
+       BITSET_SET_BIT(bs, *to);
+      }
+    }
+#else
+    if (is_in != 0) {
+      if (ONIGENC_MBC_MINLEN(env->enc) > 1 || *to >= SINGLE_BYTE_SIZE) {
+       if (IS_NCCLASS_NOT(cc)) clear_not_flag_cclass(cc, env->enc);
+       add_code_range(&(cc->mbuf), env, *to, *to);
+      }
+      else {
+       if (IS_NCCLASS_NOT(cc)) {
+         BITSET_CLEAR_BIT(bs, *to);
+       }
+       else
+         BITSET_SET_BIT(bs, *to);
+      }
+    }
+#endif /* CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS */
+  }
+  else {
+    int r, i, len;
+    UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN];
+    Node *snode = NULL_NODE;
+
+    if (onig_is_code_in_cc(env->enc, from, cc)
+#ifdef CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS
+       && !IS_NCCLASS_NOT(cc)
+#endif
+       ) {
+      for (i = 0; i < to_len; i++) {
+       len = ONIGENC_CODE_TO_MBC(env->enc, to[i], buf);
+       if (i == 0) {
+         snode = onig_node_new_str(buf, buf + len);
+         CHECK_NULL_RETURN_MEMERR(snode);
+
+         /* char-class expanded multi-char only
+            compare with string folded at match time. */
+         NSTRING_SET_AMBIG(snode);
+       }
+       else {
+         r = onig_node_str_cat(snode, buf, buf + len);
+         if (r < 0) {
+           onig_node_free(snode);
+           return r;
+         }
+       }
+      }
+
+      *(iarg->ptail) = onig_node_new_alt(snode, NULL_NODE);
+      CHECK_NULL_RETURN_MEMERR(*(iarg->ptail));
+      iarg->ptail = &(NCDR((*(iarg->ptail))));
+    }
+  }
+
+  return 0;
+}
+
 static int
 parse_exp(Node** np, OnigToken* tok, int term,
          UChar** src, UChar* end, ScanEnv* env)
@@ -4791,7 +5040,7 @@ parse_exp(Node** np, OnigToken* tok, int term,
   Node** targetp;
 
   *np = NULL;
-  if (tok->type == term)
+  if (tok->type == (enum TokenSyms )term)
     goto end_of_token;
 
   switch (tok->type) {
@@ -4803,20 +5052,20 @@ parse_exp(Node** np, OnigToken* tok, int term,
   break;
 
   case TK_SUBEXP_OPEN:
-    r = parse_effect(np, tok, TK_SUBEXP_CLOSE, src, end, env);
+    r = parse_enclose(np, tok, TK_SUBEXP_CLOSE, src, end, env);
     if (r < 0) return r;
     if (r == 1) group = 1;
     else if (r == 2) { /* option only */
       Node* target;
       OnigOptionType prev = env->option;
 
-      env->option = NEFFECT(*np).option;
+      env->option = NENCLOSE(*np)->option;
       r = fetch_token(tok, src, end, env);
       if (r < 0) return r;
       r = parse_subexp(&target, tok, term, src, end, env);
       env->option = prev;
       if (r < 0) return r;
-      NEFFECT(*np).target = target;    
+      NENCLOSE(*np)->target = target;  
       return tok->type;
     }
     break;
@@ -4833,7 +5082,7 @@ parse_exp(Node** np, OnigToken* tok, int term,
   tk_byte:
     {
       *np = node_new_str(tok->backp, *src);
-      CHECK_NULL_RETURN_VAL(*np, ONIGERR_MEMORY);
+      CHECK_NULL_RETURN_MEMERR(*np);
 
       while (1) {
        r = fetch_token(tok, src, end, env);
@@ -4853,13 +5102,14 @@ parse_exp(Node** np, OnigToken* tok, int term,
   case TK_RAW_BYTE:
   tk_raw_byte:
     {
-      *np = node_new_str_char((UChar )tok->u.c);
-      CHECK_NULL_RETURN_VAL(*np, ONIGERR_MEMORY);
+      *np = node_new_str_raw_char((UChar )tok->u.c);
+      CHECK_NULL_RETURN_MEMERR(*np);
       len = 1;
       while (1) {
        if (len >= ONIGENC_MBC_MINLEN(env->enc)) {
-         if (len == enc_len(env->enc, NSTRING(*np).s)) {
+         if (len == enclen(env->enc, NSTR(*np)->s)) {
            r = fetch_token(tok, src, end, env);
+           NSTRING_CLEAR_RAW(*np);
            goto string_end;
          }
        }
@@ -4867,12 +5117,14 @@ parse_exp(Node** np, OnigToken* tok, int term,
        r = fetch_token(tok, src, end, env);
        if (r < 0) return r;
        if (r != TK_RAW_BYTE) {
+         /* Don't use this, it is wrong for little endian encodings. */
 #ifdef USE_PAD_TO_SHORT_BYTE_CHAR
          int rem;
          if (len < ONIGENC_MBC_MINLEN(env->enc)) {
            rem = ONIGENC_MBC_MINLEN(env->enc) - len;
-           (void )node_str_head_pad(&NSTRING(*np), rem, (UChar )0);
-           if (len + rem == enc_len(env->enc, NSTRING(*np).s)) {
+           (void )node_str_head_pad(NSTR(*np), rem, (UChar )0);
+           if (len + rem == enclen(env->enc, NSTR(*np)->s)) {
+             NSTRING_CLEAR_RAW(*np);
              goto string_end;
            }
          }
@@ -4898,7 +5150,7 @@ parse_exp(Node** np, OnigToken* tok, int term,
 #else
       *np = node_new_str(buf, buf + num);
 #endif
-      CHECK_NULL_RETURN_VAL(*np, ONIGERR_MEMORY);
+      CHECK_NULL_RETURN_MEMERR(*np);
     }
     break;
 
@@ -4907,7 +5159,7 @@ parse_exp(Node** np, OnigToken* tok, int term,
       OnigCodePoint end_op[2];
       UChar *qstart, *qend, *nextp;
 
-      end_op[0] = (OnigCodePoint )MC_ESC(env->enc);
+      end_op[0] = (OnigCodePoint )MC_ESC(env->syntax);
       end_op[1] = (OnigCodePoint )'E';
       qstart = *src;
       qend = find_str_position(end_op, 2, qstart, end, &nextp, env->enc);
@@ -4915,35 +5167,31 @@ parse_exp(Node** np, OnigToken* tok, int term,
        nextp = qend = end;
       }
       *np = node_new_str(qstart, qend);
-      CHECK_NULL_RETURN_VAL(*np, ONIGERR_MEMORY);
+      CHECK_NULL_RETURN_MEMERR(*np);
       *src = nextp;
     }
     break;
 
   case TK_CHAR_TYPE:
     {
-      switch (tok->u.subtype) {
-      case CTYPE_WORD:
-      case CTYPE_NOT_WORD:
-       *np = node_new_ctype(tok->u.subtype);
-       CHECK_NULL_RETURN_VAL(*np, ONIGERR_MEMORY);
+      switch (tok->u.prop.ctype) {
+      case ONIGENC_CTYPE_WORD:
+       *np = node_new_ctype(tok->u.prop.ctype, tok->u.prop.not);
+       CHECK_NULL_RETURN_MEMERR(*np);
        break;
 
-      case CTYPE_WHITE_SPACE:
-      case CTYPE_NOT_WHITE_SPACE:
-      case CTYPE_DIGIT:
-      case CTYPE_NOT_DIGIT:
-      case CTYPE_XDIGIT:
-      case CTYPE_NOT_XDIGIT:
+      case ONIGENC_CTYPE_SPACE:
+      case ONIGENC_CTYPE_DIGIT:
+      case ONIGENC_CTYPE_XDIGIT:
        {
          CClassNode* cc;
-         int ctype, not;
 
 #ifdef USE_SHARED_CCLASS_TABLE
-          const OnigCodePoint *sbr, *mbr;
+          const OnigCodePoint *mbr;
+         OnigCodePoint sb_out;
 
-         ctype = parse_ctype_to_enc_ctype(tok->u.subtype, &not);
-          r = ONIGENC_GET_CTYPE_CODE_RANGE(env->enc, ctype, &sbr, &mbr);
+          r = ONIGENC_GET_CTYPE_CODE_RANGE(env->enc, tok->u.prop.ctype,
+                                          &sb_out, &mbr);
           if (r == 0 &&
               ONIGENC_CODE_RANGE_NUM(mbr)
               >= THRESHOLD_RANGE_NUM_FOR_SHARE_CCLASS) {
@@ -4951,8 +5199,8 @@ parse_exp(Node** np, OnigToken* tok, int term,
             type_cclass_key* new_key;
 
             key.enc  = env->enc;
-            key.not  = not;
-            key.type = ctype;
+            key.not  = tok->u.prop.not;
+            key.type = tok->u.prop.ctype;
 
             THREAD_ATOMIC_START;
 
@@ -4972,14 +5220,17 @@ parse_exp(Node** np, OnigToken* tok, int term,
               }
             }
 
-            *np = node_new_cclass_by_codepoint_range(not, sbr, mbr);
+            *np = node_new_cclass_by_codepoint_range(tok->u.prop.not,
+                                                    sb_out, mbr);
             if (IS_NULL(*np)) {
               THREAD_ATOMIC_END;
               return ONIGERR_MEMORY;
             }
 
-            CCLASS_SET_SHARE(&(NCCLASS(*np)));
+            cc = NCCLASS(*np);
+            NCCLASS_SET_SHARE(cc);
             new_key = (type_cclass_key* )xmalloc(sizeof(type_cclass_key));
+           xmemcpy(new_key, &key, sizeof(type_cclass_key));
             onig_st_add_direct(OnigTypeCClassTable, (st_data_t )new_key,
                                (st_data_t )*np);
             
@@ -4987,12 +5238,11 @@ parse_exp(Node** np, OnigToken* tok, int term,
           }
           else {
 #endif
-            ctype = parse_ctype_to_enc_ctype(tok->u.subtype, &not);
             *np = node_new_cclass();
-            CHECK_NULL_RETURN_VAL(*np, ONIGERR_MEMORY);
-            cc = &(NCCLASS(*np));
-            add_ctype_to_cc(cc, ctype, 0, env);
-            if (not != 0) CCLASS_SET_NOT(cc);
+            CHECK_NULL_RETURN_MEMERR(*np);
+            cc = NCCLASS(*np);
+            add_ctype_to_cc(cc, tok->u.prop.ctype, 0, env);
+            if (tok->u.prop.not != 0) NCCLASS_SET_NOT(cc);
 #ifdef USE_SHARED_CCLASS_TABLE
           }
 #endif
@@ -5018,55 +5268,44 @@ parse_exp(Node** np, OnigToken* tok, int term,
       r = parse_char_class(np, tok, src, end, env);
       if (r != 0) return r;
 
-      cc = &(NCCLASS(*np));
-
+      cc = NCCLASS(*np);
       if (IS_IGNORECASE(env->option)) {
-        int i, n, in_cc;
-        const OnigPairAmbigCodes* ccs;
-        BitSetRef bs = cc->bs;
-        OnigAmbigType amb;
-
-        for (amb = 0x01; amb <= ONIGENC_AMBIGUOUS_MATCH_LIMIT; amb <<= 1) {
-          if ((amb & env->ambig_flag) == 0)  continue;
-
-          n = ONIGENC_GET_ALL_PAIR_AMBIG_CODES(env->enc, amb, &ccs);
-          for (i = 0; i < n; i++) {
-            in_cc = onig_is_code_in_cc(env->enc, ccs[i].from, cc);
-
-            if ((in_cc != 0 && !IS_CCLASS_NOT(cc)) ||
-                (in_cc == 0 && IS_CCLASS_NOT(cc))) {
-              if (ONIGENC_MBC_MINLEN(env->enc) > 1 ||
-                  ccs[i].from >= SINGLE_BYTE_SIZE) {
-                /* if (cc->not) clear_not_flag_cclass(cc, env->enc); */
-                add_code_range(&(cc->mbuf), env, ccs[i].to, ccs[i].to);
-              }
-              else {
-                if (BITSET_AT(bs, ccs[i].from)) {
-                  /* /(?i:[^A-C])/.match("a") ==> fail. */
-                  BITSET_SET_BIT(bs, ccs[i].to);
-                }
-                if (BITSET_AT(bs, ccs[i].to)) {
-                  BITSET_SET_BIT(bs, ccs[i].from);
-                }
-              }
-            }
+       IApplyCaseFoldArg iarg;
+
+       iarg.env      = env;
+       iarg.cc       = cc;
+       iarg.alt_root = NULL_NODE;
+       iarg.ptail    = &(iarg.alt_root);
+
+       r = ONIGENC_APPLY_ALL_CASE_FOLD(env->enc, env->case_fold_flag,
+                                       i_apply_case_fold, &iarg);
+       if (r != 0) {
+         onig_node_free(iarg.alt_root);
+         return r;
+       }
+       if (IS_NOT_NULL(iarg.alt_root)) {
+          Node* work = onig_node_new_alt(*np, iarg.alt_root);
+          if (IS_NULL(work)) {
+            onig_node_free(iarg.alt_root);
+            return ONIGERR_MEMORY;
           }
-        }
+          *np = work;
+       }
       }
     }
     break;
 
   case TK_ANYCHAR:
     *np = node_new_anychar();
-    CHECK_NULL_RETURN_VAL(*np, ONIGERR_MEMORY);
+    CHECK_NULL_RETURN_MEMERR(*np);
     break;
 
   case TK_ANYCHAR_ANYTIME:
     *np = node_new_anychar();
-    CHECK_NULL_RETURN_VAL(*np, ONIGERR_MEMORY);
+    CHECK_NULL_RETURN_MEMERR(*np);
     qn = node_new_quantifier(0, REPEAT_INFINITE, 0);
-    CHECK_NULL_RETURN_VAL(qn, ONIGERR_MEMORY);
-    NQUANTIFIER(qn).target = *np;
+    CHECK_NULL_RETURN_MEMERR(qn);
+    NQTFR(qn)->target = *np;
     *np = qn;
     break;
 
@@ -5075,19 +5314,28 @@ parse_exp(Node** np, OnigToken* tok, int term,
     *np = node_new_backref(len,
                   (len > 1 ? tok->u.backref.refs : &(tok->u.backref.ref1)),
                           tok->u.backref.by_name,
-#ifdef USE_BACKREF_AT_LEVEL
+#ifdef USE_BACKREF_WITH_LEVEL
                           tok->u.backref.exist_level,
                           tok->u.backref.level,
 #endif
                           env);
-    CHECK_NULL_RETURN_VAL(*np, ONIGERR_MEMORY);
+    CHECK_NULL_RETURN_MEMERR(*np);
     break;
 
 #ifdef USE_SUBEXP_CALL
   case TK_CALL:
-    *np = node_new_call(tok->u.call.name, tok->u.call.name_end);
-    CHECK_NULL_RETURN_VAL(*np, ONIGERR_MEMORY);
-    env->num_call++;
+    {
+      int gnum = tok->u.call.gnum;
+
+      if (gnum < 0) {
+       gnum = BACKREF_REL_TO_ABS(gnum, env);
+       if (gnum <= 0)
+         return ONIGERR_INVALID_BACKREF;
+      }
+      *np = node_new_call(tok->u.call.name, tok->u.call.name_end, gnum);
+      CHECK_NULL_RETURN_MEMERR(*np);
+      env->num_call++;
+    }
     break;
 #endif
 
@@ -5126,31 +5374,46 @@ parse_exp(Node** np, OnigToken* tok, int term,
        return ONIGERR_TARGET_OF_REPEAT_OPERATOR_INVALID;
 
       qn = node_new_quantifier(tok->u.repeat.lower, tok->u.repeat.upper,
-                             (r == TK_INTERVAL ? 1 : 0));
-      CHECK_NULL_RETURN_VAL(qn, ONIGERR_MEMORY);
-      NQUANTIFIER(qn).greedy = tok->u.repeat.greedy;
+                              (r == TK_INTERVAL ? 1 : 0));
+      CHECK_NULL_RETURN_MEMERR(qn);
+      NQTFR(qn)->greedy = tok->u.repeat.greedy;
       r = set_quantifier(qn, *targetp, group, env);
-      if (r < 0) return r;
-      
+      if (r < 0) {
+       onig_node_free(qn);
+       return r;
+      }
+
       if (tok->u.repeat.possessive != 0) {
        Node* en;
-       en = node_new_effect(EFFECT_STOP_BACKTRACK);
-       CHECK_NULL_RETURN_VAL(en, ONIGERR_MEMORY);
-       NEFFECT(en).target = qn;
+       en = node_new_enclose(ENCLOSE_STOP_BACKTRACK);
+       if (IS_NULL(en)) {
+         onig_node_free(qn);
+         return ONIGERR_MEMORY;
+       }
+       NENCLOSE(en)->target = qn;
        qn = en;
       }
 
       if (r == 0) {
        *targetp = qn;
       }
+      else if (r == 1) {
+       onig_node_free(qn);
+      }
       else if (r == 2) { /* split case: /abc+/ */
        Node *tmp;
 
        *targetp = node_new_list(*targetp, NULL);
-       CHECK_NULL_RETURN_VAL(*targetp, ONIGERR_MEMORY);
-       tmp = NCONS(*targetp).right = node_new_list(qn, NULL);
-       CHECK_NULL_RETURN_VAL(tmp, ONIGERR_MEMORY);
-       targetp = &(NCONS(tmp).left);
+       if (IS_NULL(*targetp)) {
+         onig_node_free(qn);
+         return ONIGERR_MEMORY;
+       }
+       tmp = NCDR(*targetp) = node_new_list(qn, NULL);
+       if (IS_NULL(tmp)) {
+         onig_node_free(qn);
+         return ONIGERR_MEMORY;
+       }
+       targetp = &(NCAR(tmp));
       }
       goto re_entry;
     }
@@ -5175,19 +5438,19 @@ parse_branch(Node** top, OnigToken* tok, int term,
   }
   else {
     *top  = node_new_list(node, NULL);
-    headp = &(NCONS(*top).right);
+    headp = &(NCDR(*top));
     while (r != TK_EOT && r != term && r != TK_ALT) {
       r = parse_exp(&node, tok, term, src, end, env);
       if (r < 0) return r;
 
-      if (NTYPE(node) == N_LIST) {
+      if (NTYPE(node) == NT_LIST) {
        *headp = node;
-       while (IS_NOT_NULL(NCONS(node).right)) node = NCONS(node).right;
-       headp = &(NCONS(node).right);
+       while (IS_NOT_NULL(NCDR(node))) node = NCDR(node);
+       headp = &(NCDR(node));
       }
       else {
        *headp = node_new_list(node, NULL);
-       headp = &(NCONS(*headp).right);
+       headp = &(NCDR(*headp));
       }
     }
   }
@@ -5214,19 +5477,19 @@ parse_subexp(Node** top, OnigToken* tok, int term,
     *top = node;
   }
   else if (r == TK_ALT) {
-    *top  = node_new_alt(node, NULL);
-    headp = &(NCONS(*top).right);
+    *top  = onig_node_new_alt(node, NULL);
+    headp = &(NCDR(*top));
     while (r == TK_ALT) {
       r = fetch_token(tok, src, end, env);
       if (r < 0) return r;
       r = parse_branch(&node, tok, term, src, end, env);
       if (r < 0) return r;
 
-      *headp = node_new_alt(node, NULL);
-      headp = &(NCONS(*headp).right);
+      *headp = onig_node_new_alt(node, NULL);
+      headp = &(NCDR(*headp));
     }
 
-    if (tok->type != term)
+    if (tok->type != (enum TokenSyms )term)
       goto err;
   }
   else {
@@ -5254,8 +5517,8 @@ parse_regexp(Node** top, UChar** src, UChar* end, ScanEnv* env)
 }
 
 extern int
-onig_parse_make_tree(Node** root, const UChar* pattern, const UChar* end, regex_t* reg,
-                     ScanEnv* env)
+onig_parse_make_tree(Node** root, const UChar* pattern, const UChar* end,
+                    regex_t* reg, ScanEnv* env)
 {
   int r;
   UChar* p;
@@ -5265,13 +5528,13 @@ onig_parse_make_tree(Node** root, const UChar* pattern, const UChar* end, regex_
 #endif
 
   scan_env_clear(env);
-  env->option      = reg->options;
-  env->ambig_flag  = reg->ambig_flag;
-  env->enc         = reg->enc;
-  env->syntax      = reg->syntax;
-  env->pattern     = (UChar* )pattern;
-  env->pattern_end = (UChar* )end;
-  env->reg         = reg;
+  env->option         = reg->options;
+  env->case_fold_flag = reg->case_fold_flag;
+  env->enc            = reg->enc;
+  env->syntax         = reg->syntax;
+  env->pattern        = (UChar* )pattern;
+  env->pattern_end    = (UChar* )end;
+  env->reg            = reg;
 
   *root = NULL;
   p = (UChar* )pattern;
@@ -5281,7 +5544,7 @@ onig_parse_make_tree(Node** root, const UChar* pattern, const UChar* end, regex_
 }
 
 extern void
-onig_scan_env_set_error_string(ScanEnv* env, int ecode,
+onig_scan_env_set_error_string(ScanEnv* env, int ecode ARG_UNUSED,
                                UChar* arg, UChar* arg_end)
 {
   env->error     = arg;
index b25618a33f996a12bbea079262dc245b2cd85445..0c5c2c936c0449438ee722a57f6d5e9d6f9b7e5e 100644 (file)
 #include "regint.h"
 
 /* node type */
-#define N_STRING       (1<< 0)
-#define N_CCLASS       (1<< 1)
-#define N_CTYPE        (1<< 2)
-#define N_ANYCHAR      (1<< 3)
-#define N_BACKREF      (1<< 4)
-#define N_QUANTIFIER   (1<< 5)
-#define N_EFFECT       (1<< 6)
-#define N_ANCHOR       (1<< 7)
-#define N_LIST         (1<< 8)
-#define N_ALT          (1<< 9)
-#define N_CALL         (1<<10)
+#define NT_STR         0
+#define NT_CCLASS      1
+#define NT_CTYPE       2
+#define NT_CANY        3
+#define NT_BREF        4
+#define NT_QTFR        5
+#define NT_ENCLOSE     6
+#define NT_ANCHOR      7
+#define NT_LIST        8
+#define NT_ALT         9
+#define NT_CALL       10
+
+/* node type bit */
+#define NTYPE2BIT(type)      (1<<(type))
+
+#define BIT_NT_STR        NTYPE2BIT(NT_STR)
+#define BIT_NT_CCLASS     NTYPE2BIT(NT_CCLASS)
+#define BIT_NT_CTYPE      NTYPE2BIT(NT_CTYPE)
+#define BIT_NT_CANY       NTYPE2BIT(NT_CANY)
+#define BIT_NT_BREF       NTYPE2BIT(NT_BREF)
+#define BIT_NT_QTFR       NTYPE2BIT(NT_QTFR)
+#define BIT_NT_ENCLOSE    NTYPE2BIT(NT_ENCLOSE)
+#define BIT_NT_ANCHOR     NTYPE2BIT(NT_ANCHOR)
+#define BIT_NT_LIST       NTYPE2BIT(NT_LIST)
+#define BIT_NT_ALT        NTYPE2BIT(NT_ALT)
+#define BIT_NT_CALL       NTYPE2BIT(NT_CALL)
 
 #define IS_NODE_TYPE_SIMPLE(type) \
-  (((type) & (N_STRING | N_CCLASS | N_CTYPE | N_ANYCHAR | N_BACKREF)) != 0)
-
-#define NTYPE(node)        ((node)->type)
-#define NCONS(node)        ((node)->u.cons)
-#define NSTRING(node)      ((node)->u.str)
-#define NCCLASS(node)      ((node)->u.cclass)
-#define NCTYPE(node)       ((node)->u.ctype)
-#define NQUANTIFIER(node)  ((node)->u.quantifier)
-#define NANCHOR(node)      ((node)->u.anchor)
-#define NBACKREF(node)     ((node)->u.backref)
-#define NEFFECT(node)      ((node)->u.effect)
-#define NCALL(node)        ((node)->u.call)
-
-#define CTYPE_WORD              (1<<0)
-#define CTYPE_NOT_WORD          (1<<1)
-#define CTYPE_WHITE_SPACE       (1<<2)
-#define CTYPE_NOT_WHITE_SPACE   (1<<3)
-#define CTYPE_DIGIT             (1<<4)
-#define CTYPE_NOT_DIGIT         (1<<5)
-#define CTYPE_XDIGIT            (1<<6)
-#define CTYPE_NOT_XDIGIT        (1<<7)
+  ((NTYPE2BIT(type) & (BIT_NT_STR | BIT_NT_CCLASS | BIT_NT_CTYPE |\
+                       BIT_NT_CANY | BIT_NT_BREF)) != 0)
+
+#define NTYPE(node)             ((node)->u.base.type)
+#define SET_NTYPE(node, ntype)   (node)->u.base.type = (ntype)
+
+#define NSTR(node)         (&((node)->u.str))
+#define NCCLASS(node)      (&((node)->u.cclass))
+#define NCTYPE(node)       (&((node)->u.ctype))
+#define NBREF(node)        (&((node)->u.bref))
+#define NQTFR(node)        (&((node)->u.qtfr))
+#define NENCLOSE(node)     (&((node)->u.enclose))
+#define NANCHOR(node)      (&((node)->u.anchor))
+#define NCONS(node)        (&((node)->u.cons))
+#define NCALL(node)        (&((node)->u.call))
+
+#define NCAR(node)         (NCONS(node)->car)
+#define NCDR(node)         (NCONS(node)->cdr)
+
+
 
 #define ANCHOR_ANYCHAR_STAR_MASK (ANCHOR_ANYCHAR_STAR | ANCHOR_ANYCHAR_STAR_ML)
 #define ANCHOR_END_BUF_MASK      (ANCHOR_END_BUF | ANCHOR_SEMI_END_BUF)
 
-#define EFFECT_MEMORY           (1<<0)
-#define EFFECT_OPTION           (1<<1)
-#define EFFECT_STOP_BACKTRACK   (1<<2)
+#define ENCLOSE_MEMORY           (1<<0)
+#define ENCLOSE_OPTION           (1<<1)
+#define ENCLOSE_STOP_BACKTRACK   (1<<2)
 
 #define NODE_STR_MARGIN         16
 #define NODE_STR_BUF_SIZE       24  /* sizeof(CClassNode) - sizeof(int)*4 */
 
 #define NSTR_RAW                (1<<0) /* by backslashed number */
 #define NSTR_AMBIG              (1<<1)
-#define NSTR_AMBIG_REDUCE       (1<<2)
+#define NSTR_DONT_GET_OPT_INFO  (1<<2)
 
 #define NSTRING_LEN(node)             ((node)->u.str.end - (node)->u.str.s)
 #define NSTRING_SET_RAW(node)          (node)->u.str.flag |= NSTR_RAW
 #define NSTRING_CLEAR_RAW(node)        (node)->u.str.flag &= ~NSTR_RAW
 #define NSTRING_SET_AMBIG(node)        (node)->u.str.flag |= NSTR_AMBIG
-#define NSTRING_SET_AMBIG_REDUCE(node) (node)->u.str.flag |= NSTR_AMBIG_REDUCE
+#define NSTRING_SET_DONT_GET_OPT_INFO(node) \
+  (node)->u.str.flag |= NSTR_DONT_GET_OPT_INFO
 #define NSTRING_IS_RAW(node)          (((node)->u.str.flag & NSTR_RAW)   != 0)
 #define NSTRING_IS_AMBIG(node)        (((node)->u.str.flag & NSTR_AMBIG) != 0)
-#define NSTRING_IS_AMBIG_REDUCE(node) \
-  (((node)->u.str.flag & NSTR_AMBIG_REDUCE) != 0)
+#define NSTRING_IS_DONT_GET_OPT_INFO(node) \
+  (((node)->u.str.flag & NSTR_DONT_GET_OPT_INFO) != 0)
 
 #define BACKREFS_P(br) \
   (IS_NOT_NULL((br)->back_dynamic) ? (br)->back_dynamic : (br)->back_static);
 #define NQ_TARGET_IS_EMPTY_MEM    2
 #define NQ_TARGET_IS_EMPTY_REC    3
 
-
-typedef struct {
-  UChar* s;
-  UChar* end;
-  unsigned int flag;
-  int    capa;    /* (allocated size - 1) or 0: use buf[] */
-  UChar  buf[NODE_STR_BUF_SIZE];
-} StrNode;
-
-/* move to regint.h */
-#if 0
-typedef struct {
-  int    flags;
-  BitSet bs;
-  BBuf*  mbuf;     /* multi-byte info or NULL */
-} CClassNode;
-#endif
-
-typedef struct {
-  int state;
-  struct _Node* target;
-  int lower;
-  int upper;
-  int greedy;
-  int target_empty_info;
-  struct _Node* head_exact;
-  struct _Node* next_head_exact;
-  int is_refered;     /* include called node. don't eliminate even if {0} */
-#ifdef USE_COMBINATION_EXPLOSION_CHECK
-  int comb_exp_check_num;  /* 1,2,3...: check,  0: no check  */
-#endif
-} QuantifierNode;
-
 /* status bits */
 #define NST_MIN_FIXED             (1<<0)
 #define NST_MAX_FIXED             (1<<1)
@@ -150,105 +132,142 @@ typedef struct {
 #define NST_NEST_LEVEL            (1<<13)
 #define NST_BY_NUMBER             (1<<14) /* {n,m} */
 
-#define SET_EFFECT_STATUS(node,f)      (node)->u.effect.state |=  (f)
-#define CLEAR_EFFECT_STATUS(node,f)    (node)->u.effect.state &= ~(f)
-
-#define IS_EFFECT_CALLED(en)           (((en)->state & NST_CALLED)        != 0)
-#define IS_EFFECT_ADDR_FIXED(en)       (((en)->state & NST_ADDR_FIXED)    != 0)
-#define IS_EFFECT_RECURSION(en)        (((en)->state & NST_RECURSION)     != 0)
-#define IS_EFFECT_MARK1(en)            (((en)->state & NST_MARK1)         != 0)
-#define IS_EFFECT_MARK2(en)            (((en)->state & NST_MARK2)         != 0)
-#define IS_EFFECT_MIN_FIXED(en)        (((en)->state & NST_MIN_FIXED)     != 0)
-#define IS_EFFECT_MAX_FIXED(en)        (((en)->state & NST_MAX_FIXED)     != 0)
-#define IS_EFFECT_CLEN_FIXED(en)       (((en)->state & NST_CLEN_FIXED)    != 0)
-#define IS_EFFECT_STOP_BT_SIMPLE_REPEAT(en) \
+#define SET_ENCLOSE_STATUS(node,f)      (node)->u.enclose.state |=  (f)
+#define CLEAR_ENCLOSE_STATUS(node,f)    (node)->u.enclose.state &= ~(f)
+
+#define IS_ENCLOSE_CALLED(en)          (((en)->state & NST_CALLED)        != 0)
+#define IS_ENCLOSE_ADDR_FIXED(en)      (((en)->state & NST_ADDR_FIXED)    != 0)
+#define IS_ENCLOSE_RECURSION(en)       (((en)->state & NST_RECURSION)     != 0)
+#define IS_ENCLOSE_MARK1(en)           (((en)->state & NST_MARK1)         != 0)
+#define IS_ENCLOSE_MARK2(en)           (((en)->state & NST_MARK2)         != 0)
+#define IS_ENCLOSE_MIN_FIXED(en)       (((en)->state & NST_MIN_FIXED)     != 0)
+#define IS_ENCLOSE_MAX_FIXED(en)       (((en)->state & NST_MAX_FIXED)     != 0)
+#define IS_ENCLOSE_CLEN_FIXED(en)      (((en)->state & NST_CLEN_FIXED)    != 0)
+#define IS_ENCLOSE_STOP_BT_SIMPLE_REPEAT(en) \
     (((en)->state & NST_STOP_BT_SIMPLE_REPEAT) != 0)
-#define IS_EFFECT_NAMED_GROUP(en)      (((en)->state & NST_NAMED_GROUP)   != 0)
+#define IS_ENCLOSE_NAMED_GROUP(en)     (((en)->state & NST_NAMED_GROUP)   != 0)
 
 #define SET_CALL_RECURSION(node)       (node)->u.call.state |= NST_RECURSION
 #define IS_CALL_RECURSION(cn)          (((cn)->state & NST_RECURSION)  != 0)
 #define IS_CALL_NAME_REF(cn)           (((cn)->state & NST_NAME_REF)   != 0)
 #define IS_BACKREF_NAME_REF(bn)        (((bn)->state & NST_NAME_REF)   != 0)
 #define IS_BACKREF_NEST_LEVEL(bn)      (((bn)->state & NST_NEST_LEVEL) != 0)
-#define IS_QUANTIFIER_IN_REPEAT(qn)     (((qn)->state & NST_IN_REPEAT)  != 0)
-#define IS_QUANTIFIER_BY_NUMBER(qn)     (((qn)->state & NST_BY_NUMBER)  != 0)
+#define IS_QUANTIFIER_IN_REPEAT(qn)    (((qn)->state & NST_IN_REPEAT)  != 0)
+#define IS_QUANTIFIER_BY_NUMBER(qn)    (((qn)->state & NST_BY_NUMBER)  != 0)
+
+#define CALLNODE_REFNUM_UNDEF  -1
 
 typedef struct {
+  NodeBase base;
+  UChar* s;
+  UChar* end;
+  unsigned int flag;
+  int    capa;    /* (allocated size - 1) or 0: use buf[] */
+  UChar  buf[NODE_STR_BUF_SIZE];
+} StrNode;
+
+typedef struct {
+  NodeBase base;
+  int state;
+  struct _Node* target;
+  int lower;
+  int upper;
+  int greedy;
+  int target_empty_info;
+  struct _Node* head_exact;
+  struct _Node* next_head_exact;
+  int is_refered;     /* include called node. don't eliminate even if {0} */
+#ifdef USE_COMBINATION_EXPLOSION_CHECK
+  int comb_exp_check_num;  /* 1,2,3...: check,  0: no check  */
+#endif
+} QtfrNode;
+
+typedef struct {
+  NodeBase base;
   int state;
   int type;
   int regnum;
   OnigOptionType option;
-  struct _Node* target;
-  AbsAddrType call_addr;
+  struct _Node*  target;
+  AbsAddrType    call_addr;
   /* for multiple call reference */
   OnigDistance min_len; /* min length (byte) */
   OnigDistance max_len; /* max length (byte) */ 
-  int char_len;        /* character length  */
-  int opt_count;       /* referenced count in optimize_node_left() */
-} EffectNode;
-
-#define CALLNODE_REFNUM_UNDEF  -1
+  int char_len;         /* character length  */
+  int opt_count;        /* referenced count in optimize_node_left() */
+} EncloseNode;
 
 #ifdef USE_SUBEXP_CALL
 
 typedef struct {
-  int offset;
+  int           offset;
   struct _Node* target;
 } UnsetAddr;
 
 typedef struct {
-  int num;
-  int alloc;
+  int        num;
+  int        alloc;
   UnsetAddr* us;
 } UnsetAddrList;
 
 typedef struct {
+  NodeBase base;
   int     state;
-  int     ref_num;
+  int     group_num;
   UChar*  name;
   UChar*  name_end;
-  struct _Node* target;  /* EffectNode : EFFECT_MEMORY */
+  struct _Node*  target;  /* EncloseNode : ENCLOSE_MEMORY */
   UnsetAddrList* unset_addr_list;
 } CallNode;
 
 #endif
 
 typedef struct {
-  int     state;
-  int     back_num;
-  int     back_static[NODE_BACKREFS_SIZE];
-  int*    back_dynamic;
-  int     nest_level;
-} BackrefNode;
+  NodeBase base;
+  int  state;
+  int  back_num;
+  int  back_static[NODE_BACKREFS_SIZE];
+  int* back_dynamic;
+  int  nest_level;
+} BRefNode;
 
 typedef struct {
+  NodeBase base;
   int type;
   struct _Node* target;
   int char_len;
 } AnchorNode;
 
+typedef struct {
+  NodeBase base;
+  struct _Node* car;
+  struct _Node* cdr;
+} ConsAltNode;
+
+typedef struct {
+  NodeBase base;
+  int ctype;
+  int not;
+} CtypeNode;
+
 typedef struct _Node {
-  int type;
   union {
-    StrNode        str;
-    CClassNode     cclass;
-    QuantifierNode quantifier;
-    EffectNode     effect;
+    NodeBase     base;
+    StrNode      str;
+    CClassNode   cclass;
+    QtfrNode     qtfr;
+    EncloseNode  enclose;
+    BRefNode     bref;
+    AnchorNode   anchor;
+    ConsAltNode  cons;
+    CtypeNode    ctype;
 #ifdef USE_SUBEXP_CALL
-    CallNode       call;
+    CallNode     call;
 #endif
-    BackrefNode    backref;
-    AnchorNode     anchor;
-    struct {
-      struct _Node* left;
-      struct _Node* right;
-    } cons;
-    struct {
-      int type;
-    } ctype;
   } u;
 } Node;
 
+
 #define NULL_NODE  ((Node* )0)
 
 #define SCANENV_MEMNODES_SIZE               8
@@ -257,30 +276,30 @@ typedef struct _Node {
     (senv)->mem_nodes_dynamic : (senv)->mem_nodes_static)
 
 typedef struct {
-  OnigOptionType  option;
-  OnigAmbigType   ambig_flag;
-  OnigEncoding    enc;
-  OnigSyntaxType* syntax;
-  BitStatusType   capture_history;
-  BitStatusType   bt_mem_start;
-  BitStatusType   bt_mem_end;
-  BitStatusType   backrefed_mem;
-  UChar*          pattern;
-  UChar*          pattern_end;
-  UChar*          error;
-  UChar*          error_end;
-  regex_t*        reg;       /* for reg->names only */
-  int             num_call;
+  OnigOptionType   option;
+  OnigCaseFoldType case_fold_flag;
+  OnigEncoding     enc;
+  OnigSyntaxType*  syntax;
+  BitStatusType    capture_history;
+  BitStatusType    bt_mem_start;
+  BitStatusType    bt_mem_end;
+  BitStatusType    backrefed_mem;
+  UChar*           pattern;
+  UChar*           pattern_end;
+  UChar*           error;
+  UChar*           error_end;
+  regex_t*         reg;       /* for reg->names only */
+  int              num_call;
 #ifdef USE_SUBEXP_CALL
-  UnsetAddrList*  unset_addr_list;
+  UnsetAddrList*   unset_addr_list;
 #endif
-  int             num_mem;
+  int              num_mem;
 #ifdef USE_NAMED_GROUP
-  int             num_named;
+  int              num_named;
 #endif
-  int             mem_alloc;
-  Node*           mem_nodes_static[SCANENV_MEMNODES_SIZE];
-  Node**          mem_nodes_dynamic;
+  int              mem_alloc;
+  Node*            mem_nodes_static[SCANENV_MEMNODES_SIZE];
+  Node**           mem_nodes_dynamic;
 #ifdef USE_COMBINATION_EXPLOSION_CHECK
   int num_comb_exp_check;
   int comb_exp_max_regnum;
@@ -294,7 +313,6 @@ typedef struct {
 #define IS_SYNTAX_OP2(syn, opm)   (((syn)->op2 & (opm)) != 0)
 #define IS_SYNTAX_BV(syn, bvm)    (((syn)->behavior & (bvm)) != 0)
 
-
 #ifdef USE_NAMED_GROUP
 typedef struct {
   int new_val;
@@ -304,20 +322,25 @@ extern int    onig_renumber_name_table P_((regex_t* reg, GroupNumRemap* map));
 #endif
 
 extern int    onig_strncmp P_((const UChar* s1, const UChar* s2, int n));
+extern void   onig_strcpy P_((UChar* dest, const UChar* src, const UChar* end));
 extern void   onig_scan_env_set_error_string P_((ScanEnv* env, int ecode, UChar* arg, UChar* arg_end));
 extern int    onig_scan_unsigned_number P_((UChar** src, const UChar* end, OnigEncoding enc));
 extern void   onig_reduce_nested_quantifier P_((Node* pnode, Node* cnode));
 extern void   onig_node_conv_to_str_node P_((Node* node, int raw));
 extern int    onig_node_str_cat P_((Node* node, const UChar* s, const UChar* end));
+extern int    onig_node_str_set P_((Node* node, const UChar* s, const UChar* end));
 extern void   onig_node_free P_((Node* node));
-extern Node*  onig_node_new_effect P_((int type));
+extern Node*  onig_node_new_enclose P_((int type));
 extern Node*  onig_node_new_anchor P_((int type));
 extern Node*  onig_node_new_str P_((const UChar* s, const UChar* end));
 extern Node*  onig_node_new_list P_((Node* left, Node* right));
+extern Node*  onig_node_list_add P_((Node* list, Node* x));
+extern Node*  onig_node_new_alt P_((Node* left, Node* right));
 extern void   onig_node_str_clear P_((Node* node));
 extern int    onig_free_node_list P_((void));
 extern int    onig_names_free P_((regex_t* reg));
 extern int    onig_parse_make_tree P_((Node** root, const UChar* pattern, const UChar* end, regex_t* reg, ScanEnv* env));
+extern int    onig_free_shared_cclass_table P_((void));
 
 #ifdef ONIG_DEBUG
 #ifdef USE_NAMED_GROUP
index e54b5c4089e6da3d01b28f5af6a7bbc4c796ccbe..56f75abfc1351b026f25ed769807056d2e124df7 100644 (file)
@@ -2,7 +2,7 @@
   regposerr.c - Oniguruma (regular expression library)
 **********************************************************************/
 /*-
- * Copyright (c) 2002-2005  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp>
+ * Copyright (c) 2002-2007  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
 # include <strings.h>
 #endif
 
+#if defined(__GNUC__)
+#  define ARG_UNUSED  __attribute__ ((unused))
+#else
+#  define ARG_UNUSED
+#endif
+
 static char* ESTRING[] = {
   NULL,
   "failed to match",                         /* REG_NOMATCH    */
@@ -63,13 +69,15 @@ static char* ESTRING[] = {
 
 
 extern size_t
-regerror(int posix_ecode, const regex_t* reg, char* buf, size_t size)
+regerror(int posix_ecode, const regex_t* reg ARG_UNUSED, char* buf,
+        size_t size)
 {
   char* s;
   char tbuf[35];
   size_t len;
 
-  if (posix_ecode > 0 && posix_ecode < sizeof(ESTRING) / sizeof(ESTRING[0])) {
+  if (posix_ecode > 0
+      && posix_ecode < (int )(sizeof(ESTRING) / sizeof(ESTRING[0]))) {
     s = ESTRING[posix_ecode];
   }
   else if (posix_ecode == 0) {
index a3bacf722e8638ceb6f4386922bd2ebe720369d0..7d1857cf2d4cb71f9a9925031c88438f8a8d3125 100644 (file)
@@ -2,7 +2,7 @@
   regposix.c - Oniguruma (regular expression library)
 **********************************************************************/
 /*-
- * Copyright (c) 2002-2006  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp>
+ * Copyright (c) 2002-2008  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -102,7 +102,7 @@ onig2posix_error_code(int code)
     { ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED,       REG_BADPAT  },
     { ONIGERR_TOO_BIG_WIDE_CHAR_VALUE,                    REG_EONIG_BADWC },
     { ONIGERR_TOO_LONG_WIDE_CHAR_VALUE,                   REG_EONIG_BADWC },
-    { ONIGERR_INVALID_WIDE_CHAR_VALUE,                    REG_EONIG_BADWC },
+    { ONIGERR_INVALID_CODE_POINT_VALUE,                   REG_EONIG_BADWC },
     { ONIGERR_EMPTY_GROUP_NAME,                           REG_BADPAT },
     { ONIGERR_INVALID_GROUP_NAME,                         REG_BADPAT },
     { ONIGERR_INVALID_CHAR_IN_GROUP_NAME,                 REG_BADPAT },
@@ -122,7 +122,7 @@ onig2posix_error_code(int code)
 
   if (code >= 0) return 0;
 
-  for (i = 0; i < sizeof(o2p) / sizeof(o2p[0]); i++) {
+  for (i = 0; i < (int )(sizeof(o2p) / sizeof(o2p[0])); i++) {
     if (code == o2p[i].onig_err)
       return o2p[i].posix_err;
   }
@@ -273,9 +273,9 @@ typedef struct {
   void* arg;
 } i_wrap;
 
-static int i_wrapper(const unsigned char* name, const unsigned char* name_end,
-                    int ng, int* gs,
-                    onig_regex_t* reg, void* arg)
+static int
+i_wrapper(const UChar* name, const UChar* name_end, int ng, int* gs,
+         onig_regex_t* reg ARG_UNUSED, void* arg)
 {
   i_wrap* warg = (i_wrap* )arg;
 
index 9114e39e6b3f3f8fd181aea249c4196233ae8edb..ade5b55f772a870d6859e3110b1ebee1ce5ef3ae 100644 (file)
@@ -34,6 +34,15 @@ OnigSyntaxType OnigSyntaxASIS = {
   , ONIG_SYN_OP2_INEFFECTIVE_ESCAPE
   , 0
   , ONIG_OPTION_NONE
+  ,
+  {
+      (OnigCodePoint )'\\'                       /* esc */
+    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.'  */
+    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*'  */
+    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
+    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
+    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
+  }
 };
 
 OnigSyntaxType OnigSyntaxPosixBasic = {
@@ -42,6 +51,15 @@ OnigSyntaxType OnigSyntaxPosixBasic = {
   , 0
   , 0
   , ( ONIG_OPTION_SINGLELINE | ONIG_OPTION_MULTILINE )
+  ,
+  {
+      (OnigCodePoint )'\\'                       /* esc */
+    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.'  */
+    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*'  */
+    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
+    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
+    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
+  }
 };
 
 OnigSyntaxType OnigSyntaxPosixExtended = {
@@ -54,6 +72,15 @@ OnigSyntaxType OnigSyntaxPosixExtended = {
       ONIG_SYN_ALLOW_UNMATCHED_CLOSE_SUBEXP |
       ONIG_SYN_ALLOW_DOUBLE_RANGE_OP_IN_CC )
   , ( ONIG_OPTION_SINGLELINE | ONIG_OPTION_MULTILINE )
+  ,
+  {
+      (OnigCodePoint )'\\'                       /* esc */
+    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.'  */
+    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*'  */
+    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
+    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
+    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
+  }
 };
 
 OnigSyntaxType OnigSyntaxEmacs = {
@@ -66,6 +93,15 @@ OnigSyntaxType OnigSyntaxEmacs = {
   , ONIG_SYN_OP2_ESC_GNU_BUF_ANCHOR
   , ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC
   , ONIG_OPTION_NONE
+  ,
+  {
+      (OnigCodePoint )'\\'                       /* esc */
+    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.'  */
+    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*'  */
+    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
+    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
+    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
+  }
 };
 
 OnigSyntaxType OnigSyntaxGrep = {
@@ -79,6 +115,15 @@ OnigSyntaxType OnigSyntaxGrep = {
   , 0
   , ( ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC | ONIG_SYN_NOT_NEWLINE_IN_NEGATIVE_CC )
   , ONIG_OPTION_NONE
+  ,
+  {
+      (OnigCodePoint )'\\'                       /* esc */
+    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.'  */
+    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*'  */
+    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
+    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
+    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
+  }
 };
 
 OnigSyntaxType OnigSyntaxGnuRegex = {
@@ -86,6 +131,15 @@ OnigSyntaxType OnigSyntaxGnuRegex = {
   , 0
   , SYN_GNU_REGEX_BV
   , ONIG_OPTION_NONE
+  ,
+  {
+      (OnigCodePoint )'\\'                       /* esc */
+    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.'  */
+    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*'  */
+    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
+    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
+    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
+  }
 };
 
 OnigSyntaxType OnigSyntaxJava = {
@@ -100,6 +154,15 @@ OnigSyntaxType OnigSyntaxJava = {
       ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY )
   , ( SYN_GNU_REGEX_BV | ONIG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND )
   , ONIG_OPTION_SINGLELINE
+  ,
+  {
+      (OnigCodePoint )'\\'                       /* esc */
+    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.'  */
+    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*'  */
+    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
+    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
+    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
+  }
 };
 
 OnigSyntaxType OnigSyntaxPerl = {
@@ -111,10 +174,18 @@ OnigSyntaxType OnigSyntaxPerl = {
   , ( ONIG_SYN_OP2_ESC_CAPITAL_Q_QUOTE |
       ONIG_SYN_OP2_QMARK_GROUP_EFFECT | ONIG_SYN_OP2_OPTION_PERL |
       ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY |
-      ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT |
-      ONIG_SYN_OP2_CHAR_PROPERTY_PREFIX_IS )
+      ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT )
   , SYN_GNU_REGEX_BV
   , ONIG_OPTION_SINGLELINE
+  ,
+  {
+      (OnigCodePoint )'\\'                       /* esc */
+    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.'  */
+    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*'  */
+    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
+    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
+    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
+  }
 };
 
 /* Perl + named group */
@@ -128,7 +199,6 @@ OnigSyntaxType OnigSyntaxPerl_NG = {
       ONIG_SYN_OP2_QMARK_GROUP_EFFECT | ONIG_SYN_OP2_OPTION_PERL |
       ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY  |
       ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT |
-      ONIG_SYN_OP2_CHAR_PROPERTY_PREFIX_IS    |
       ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP       |
       ONIG_SYN_OP2_ESC_K_NAMED_BACKREF        |
       ONIG_SYN_OP2_ESC_G_SUBEXP_CALL )
@@ -136,6 +206,15 @@ OnigSyntaxType OnigSyntaxPerl_NG = {
       ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP |
       ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME )
   , ONIG_OPTION_SINGLELINE
+  ,
+  {
+      (OnigCodePoint )'\\'                       /* esc */
+    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.'  */
+    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*'  */
+    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
+    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
+    , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
+  }
 };
 
 
@@ -205,7 +284,7 @@ onig_get_syntax_options(OnigSyntaxType* syntax)
 }
 
 #ifdef USE_VARIABLE_META_CHARS
-extern int onig_set_meta_char(OnigEncoding enc,
+extern int onig_set_meta_char(OnigSyntaxType* enc,
                               unsigned int what, OnigCodePoint code)
 {
   switch (what) {
index 5fad0cc18c337c2212cea9922c140b75c8ba6e99..113fbaedc64494141cfa1912a5f88da61dcc3919 100644 (file)
@@ -2,7 +2,7 @@
   regversion.c -  Oniguruma (regular expression library)
 **********************************************************************/
 /*-
- * Copyright (c) 2002-2006  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp>
+ * Copyright (c) 2002-2008  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -27,6 +27,7 @@
  * SUCH DAMAGE.
  */
 
+#include "config.h"
 #include "oniguruma.h"
 #include <stdio.h>
 
@@ -47,7 +48,7 @@ onig_copyright(void)
 {
   static char s[58];
 
-  sprintf(s, "Oniguruma %d.%d.%d : Copyright (C) 2002-2006 K.Kosako",
+  sprintf(s, "Oniguruma %d.%d.%d : Copyright (C) 2002-2008 K.Kosako",
           ONIGURUMA_VERSION_MAJOR,
           ONIGURUMA_VERSION_MINOR,
           ONIGURUMA_VERSION_TEENY);
index 2324da263515f48e31c2b7d074e3fb67d71fda46..022880ae360a4eb18dee0562aeb73634c37ed2f5 100644 (file)
@@ -2,7 +2,6 @@
 
 /* static      char    sccsid[] = "@(#) st.c 5.1 89/12/14 Crucible"; */
 
-#include "config.h"
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 #include <malloc.h>
 #endif
 
-#ifdef NOT_RUBY
 #include "regint.h"
-#else
-#ifdef RUBY_PLATFORM
-#define xmalloc ruby_xmalloc
-#define xcalloc ruby_xcalloc
-#define xrealloc ruby_xrealloc
-#define xfree ruby_xfree
-
-void *xmalloc(long);
-void *xcalloc(long, long);
-void *xrealloc(void *, long);
-void xfree(void *);
-#endif
-#endif
-
 #include "st.h"
 
 typedef struct st_table_entry st_table_entry;
@@ -467,8 +451,13 @@ st_delete_safe(table, key, value, never)
 }
 
 static int
+#if defined(__GNUC__)
+delete_never(st_data_t key __attribute__ ((unused)), st_data_t value,
+            st_data_t never)
+#else
 delete_never(key, value, never)
     st_data_t key, value, never;
+#endif
 {
     if (value == never) return ST_DELETE;
     return ST_CONTINUE;
diff --git a/ext/mbstring/oniguruma/testc.c b/ext/mbstring/oniguruma/testc.c
new file mode 100644 (file)
index 0000000..6a8c778
--- /dev/null
@@ -0,0 +1,863 @@
+/*
+ * This program was generated by testconv.rb.
+ */
+#include "config.h"
+#ifdef ONIG_ESCAPE_UCHAR_COLLISION
+#undef ONIG_ESCAPE_UCHAR_COLLISION
+#endif
+#include <stdio.h>
+
+#ifdef POSIX_TEST
+#include "onigposix.h"
+#else
+#include "oniguruma.h"
+#endif
+
+#ifdef HAVE_STRING_H
+# include <string.h>
+#else
+# include <strings.h>
+#endif
+
+#define SLEN(s)  strlen(s)
+
+static int nsucc  = 0;
+static int nfail  = 0;
+static int nerror = 0;
+
+static FILE* err_file;
+
+#ifndef POSIX_TEST
+static OnigRegion* region;
+#endif
+
+static void xx(char* pattern, char* str, int from, int to, int mem, int not)
+{
+  int r;
+
+#ifdef POSIX_TEST
+  regex_t reg;
+  char buf[200];
+  regmatch_t pmatch[25];
+
+  r = regcomp(&reg, pattern, REG_EXTENDED | REG_NEWLINE);
+  if (r) {
+    regerror(r, &reg, buf, sizeof(buf));
+    fprintf(err_file, "ERROR: %s\n", buf);
+    nerror++;
+    return ;
+  }
+
+  r = regexec(&reg, str, reg.re_nsub + 1, pmatch, 0);
+  if (r != 0 && r != REG_NOMATCH) {
+    regerror(r, &reg, buf, sizeof(buf));
+    fprintf(err_file, "ERROR: %s\n", buf);
+    nerror++;
+    return ;
+  }
+
+  if (r == REG_NOMATCH) {
+    if (not) {
+      fprintf(stdout, "OK(N): /%s/ '%s'\n", pattern, str);
+      nsucc++;
+    }
+    else {
+      fprintf(stdout, "FAIL: /%s/ '%s'\n", pattern, str);
+      nfail++;
+    }
+  }
+  else {
+    if (not) {
+      fprintf(stdout, "FAIL(N): /%s/ '%s'\n", pattern, str);
+      nfail++;
+    }
+    else {
+      if (pmatch[mem].rm_so == from && pmatch[mem].rm_eo == to) {
+        fprintf(stdout, "OK: /%s/ '%s'\n", pattern, str);
+        nsucc++;
+      }
+      else {
+        fprintf(stdout, "FAIL: /%s/ '%s' %d-%d : %d-%d\n", pattern, str,
+               from, to, pmatch[mem].rm_so, pmatch[mem].rm_eo);
+        nfail++;
+      }
+    }
+  }
+  regfree(&reg);
+
+#else
+  regex_t* reg;
+  OnigErrorInfo einfo;
+
+  r = onig_new(&reg, (UChar* )pattern, (UChar* )(pattern + SLEN(pattern)),
+              ONIG_OPTION_DEFAULT, ONIG_ENCODING_EUC_JP, ONIG_SYNTAX_DEFAULT, &einfo);
+  if (r) {
+    char s[ONIG_MAX_ERROR_MESSAGE_LEN];
+    onig_error_code_to_str((UChar* )s, r, &einfo);
+    fprintf(err_file, "ERROR: %s\n", s);
+    nerror++;
+    return ;
+  }
+
+  r = onig_search(reg, (UChar* )str, (UChar* )(str + SLEN(str)),
+                 (UChar* )str, (UChar* )(str + SLEN(str)),
+                 region, ONIG_OPTION_NONE);
+  if (r < ONIG_MISMATCH) {
+    char s[ONIG_MAX_ERROR_MESSAGE_LEN];
+    onig_error_code_to_str((UChar* )s, r);
+    fprintf(err_file, "ERROR: %s\n", s);
+    nerror++;
+    return ;
+  }
+
+  if (r == ONIG_MISMATCH) {
+    if (not) {
+      fprintf(stdout, "OK(N): /%s/ '%s'\n", pattern, str);
+      nsucc++;
+    }
+    else {
+      fprintf(stdout, "FAIL: /%s/ '%s'\n", pattern, str);
+      nfail++;
+    }
+  }
+  else {
+    if (not) {
+      fprintf(stdout, "FAIL(N): /%s/ '%s'\n", pattern, str);
+      nfail++;
+    }
+    else {
+      if (region->beg[mem] == from && region->end[mem] == to) {
+        fprintf(stdout, "OK: /%s/ '%s'\n", pattern, str);
+        nsucc++;
+      }
+      else {
+        fprintf(stdout, "FAIL: /%s/ '%s' %d-%d : %d-%d\n", pattern, str,
+               from, to, region->beg[mem], region->end[mem]);
+        nfail++;
+      }
+    }
+  }
+  onig_free(reg);
+#endif
+}
+
+static void x2(char* pattern, char* str, int from, int to)
+{
+  xx(pattern, str, from, to, 0, 0);
+}
+
+static void x3(char* pattern, char* str, int from, int to, int mem)
+{
+  xx(pattern, str, from, to, mem, 0);
+}
+
+static void n(char* pattern, char* str)
+{
+  xx(pattern, str, 0, 0, 0, 1);
+}
+
+extern int main(int argc, char* argv[])
+{
+  err_file = stdout;
+
+#ifdef POSIX_TEST
+  reg_set_encoding(REG_POSIX_ENCODING_EUC_JP);
+#else
+  region = onig_region_new();
+#endif
+
+  x2("", "", 0, 0);
+  x2("^", "", 0, 0);
+  x2("$", "", 0, 0);
+  x2("\\G", "", 0, 0);
+  x2("\\A", "", 0, 0);
+  x2("\\Z", "", 0, 0);
+  x2("\\z", "", 0, 0);
+  x2("^$", "", 0, 0);
+  x2("\\ca", "\001", 0, 1);
+  x2("\\C-b", "\002", 0, 1);
+  x2("\\c\\\\", "\034", 0, 1);
+  x2("q[\\c\\\\]", "q\034", 0, 2);
+  x2("", "a", 0, 0);
+  x2("a", "a", 0, 1);
+  x2("\\x61", "a", 0, 1);
+  x2("aa", "aa", 0, 2);
+  x2("aaa", "aaa", 0, 3);
+  x2("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", 0, 35);
+  x2("ab", "ab", 0, 2);
+  x2("b", "ab", 1, 2);
+  x2("bc", "abc", 1, 3);
+  x2("(?i:#RET#)", "#INS##RET#", 5, 10);
+  x2("\\17", "\017", 0, 1);
+  x2("\\x1f", "\x1f", 0, 1);
+  x2("a(?#....\\\\JJJJ)b", "ab", 0, 2);
+  x2("(?x)  G (o O(?-x)oO) g L", "GoOoOgLe", 0, 7);
+  x2(".", "a", 0, 1);
+  n(".", "");
+  x2("..", "ab", 0, 2);
+  x2("\\w", "e", 0, 1);
+  n("\\W", "e");
+  x2("\\s", " ", 0, 1);
+  x2("\\S", "b", 0, 1);
+  x2("\\d", "4", 0, 1);
+  n("\\D", "4");
+  x2("\\b", "z ", 0, 0);
+  x2("\\b", " z", 1, 1);
+  x2("\\B", "zz ", 1, 1);
+  x2("\\B", "z ", 2, 2);
+  x2("\\B", " z", 0, 0);
+  x2("[ab]", "b", 0, 1);
+  n("[ab]", "c");
+  x2("[a-z]", "t", 0, 1);
+  n("[^a]", "a");
+  x2("[^a]", "\n", 0, 1);
+  x2("[]]", "]", 0, 1);
+  n("[^]]", "]");
+  x2("[\\^]+", "0^^1", 1, 3);
+  x2("[b-]", "b", 0, 1);
+  x2("[b-]", "-", 0, 1);
+  x2("[\\w]", "z", 0, 1);
+  n("[\\w]", " ");
+  x2("[\\W]", "b$", 1, 2);
+  x2("[\\d]", "5", 0, 1);
+  n("[\\d]", "e");
+  x2("[\\D]", "t", 0, 1);
+  n("[\\D]", "3");
+  x2("[\\s]", " ", 0, 1);
+  n("[\\s]", "a");
+  x2("[\\S]", "b", 0, 1);
+  n("[\\S]", " ");
+  x2("[\\w\\d]", "2", 0, 1);
+  n("[\\w\\d]", " ");
+  x2("[[:upper:]]", "B", 0, 1);
+  x2("[*[:xdigit:]+]", "+", 0, 1);
+  x2("[*[:xdigit:]+]", "GHIKK-9+*", 6, 7);
+  x2("[*[:xdigit:]+]", "-@^+", 3, 4);
+  n("[[:upper]]", "A");
+  x2("[[:upper]]", ":", 0, 1);
+  x2("[\\044-\\047]", "\046", 0, 1);
+  x2("[\\x5a-\\x5c]", "\x5b", 0, 1);
+  x2("[\\x6A-\\x6D]", "\x6c", 0, 1);
+  n("[\\x6A-\\x6D]", "\x6E");
+  n("^[0-9A-F]+ 0+ UNDEF ", "75F 00000000 SECT14A notype ()    External    | _rb_apply");
+  x2("[\\[]", "[", 0, 1);
+  x2("[\\]]", "]", 0, 1);
+  x2("[&]", "&", 0, 1);
+  x2("[[ab]]", "b", 0, 1);
+  x2("[[ab]c]", "c", 0, 1);
+  n("[[^a]]", "a");
+  n("[^[a]]", "a");
+  x2("[[ab]&&bc]", "b", 0, 1);
+  n("[[ab]&&bc]", "a");
+  n("[[ab]&&bc]", "c");
+  x2("[a-z&&b-y&&c-x]", "w", 0, 1);
+  n("[^a-z&&b-y&&c-x]", "w");
+  x2("[[^a&&a]&&a-z]", "b", 0, 1);
+  n("[[^a&&a]&&a-z]", "a");
+  x2("[[^a-z&&bcdef]&&[^c-g]]", "h", 0, 1);
+  n("[[^a-z&&bcdef]&&[^c-g]]", "c");
+  x2("[^[^abc]&&[^cde]]", "c", 0, 1);
+  x2("[^[^abc]&&[^cde]]", "e", 0, 1);
+  n("[^[^abc]&&[^cde]]", "f");
+  x2("[a-&&-a]", "-", 0, 1);
+  n("[a\\-&&\\-a]", "&");
+  n("\\wabc", " abc");
+  x2("a\\Wbc", "a bc", 0, 4);
+  x2("a.b.c", "aabbc", 0, 5);
+  x2(".\\wb\\W..c", "abb bcc", 0, 7);
+  x2("\\s\\wzzz", " zzzz", 0, 5);
+  x2("aa.b", "aabb", 0, 4);
+  n(".a", "ab");
+  x2(".a", "aa", 0, 2);
+  x2("^a", "a", 0, 1);
+  x2("^a$", "a", 0, 1);
+  x2("^\\w$", "a", 0, 1);
+  n("^\\w$", " ");
+  x2("^\\wab$", "zab", 0, 3);
+  x2("^\\wabcdef$", "zabcdef", 0, 7);
+  x2("^\\w...def$", "zabcdef", 0, 7);
+  x2("\\w\\w\\s\\Waaa\\d", "aa  aaa4", 0, 8);
+  x2("\\A\\Z", "", 0, 0);
+  x2("\\Axyz", "xyz", 0, 3);
+  x2("xyz\\Z", "xyz", 0, 3);
+  x2("xyz\\z", "xyz", 0, 3);
+  x2("a\\Z", "a", 0, 1);
+  x2("\\Gaz", "az", 0, 2);
+  n("\\Gz", "bza");
+  n("az\\G", "az");
+  n("az\\A", "az");
+  n("a\\Az", "az");
+  x2("\\^\\$", "^$", 0, 2);
+  x2("^x?y", "xy", 0, 2);
+  x2("^(x?y)", "xy", 0, 2);
+  x2("\\w", "_", 0, 1);
+  n("\\W", "_");
+  x2("(?=z)z", "z", 0, 1);
+  n("(?=z).", "a");
+  x2("(?!z)a", "a", 0, 1);
+  n("(?!z)a", "z");
+  x2("(?i:a)", "a", 0, 1);
+  x2("(?i:a)", "A", 0, 1);
+  x2("(?i:A)", "a", 0, 1);
+  n("(?i:A)", "b");
+  x2("(?i:[A-Z])", "a", 0, 1);
+  x2("(?i:[f-m])", "H", 0, 1);
+  x2("(?i:[f-m])", "h", 0, 1);
+  n("(?i:[f-m])", "e");
+  x2("(?i:[A-c])", "D", 0, 1);
+  n("(?i:[^a-z])", "A");
+  n("(?i:[^a-z])", "a");
+  x2("(?i:[!-k])", "Z", 0, 1);
+  x2("(?i:[!-k])", "7", 0, 1);
+  x2("(?i:[T-}])", "b", 0, 1);
+  x2("(?i:[T-}])", "{", 0, 1);
+  x2("(?i:\\?a)", "?A", 0, 2);
+  x2("(?i:\\*A)", "*a", 0, 2);
+  n(".", "\n");
+  x2("(?m:.)", "\n", 0, 1);
+  x2("(?m:a.)", "a\n", 0, 2);
+  x2("(?m:.b)", "a\nb", 1, 3);
+  x2(".*abc", "dddabdd\nddabc", 8, 13);
+  x2("(?m:.*abc)", "dddabddabc", 0, 10);
+  n("(?i)(?-i)a", "A");
+  n("(?i)(?-i:a)", "A");
+  x2("a?", "", 0, 0);
+  x2("a?", "b", 0, 0);
+  x2("a?", "a", 0, 1);
+  x2("a*", "", 0, 0);
+  x2("a*", "a", 0, 1);
+  x2("a*", "aaa", 0, 3);
+  x2("a*", "baaaa", 0, 0);
+  n("a+", "");
+  x2("a+", "a", 0, 1);
+  x2("a+", "aaaa", 0, 4);
+  x2("a+", "aabbb", 0, 2);
+  x2("a+", "baaaa", 1, 5);
+  x2(".?", "", 0, 0);
+  x2(".?", "f", 0, 1);
+  x2(".?", "\n", 0, 0);
+  x2(".*", "", 0, 0);
+  x2(".*", "abcde", 0, 5);
+  x2(".+", "z", 0, 1);
+  x2(".+", "zdswer\n", 0, 6);
+  x2("(.*)a\\1f", "babfbac", 0, 4);
+  x2("(.*)a\\1f", "bacbabf", 3, 7);
+  x2("((.*)a\\2f)", "bacbabf", 3, 7);
+  x2("(.*)a\\1f", "baczzzzzz\nbazz\nzzzzbabf", 19, 23);
+  x2("a|b", "a", 0, 1);
+  x2("a|b", "b", 0, 1);
+  x2("|a", "a", 0, 0);
+  x2("(|a)", "a", 0, 0);
+  x2("ab|bc", "ab", 0, 2);
+  x2("ab|bc", "bc", 0, 2);
+  x2("z(?:ab|bc)", "zbc", 0, 3);
+  x2("a(?:ab|bc)c", "aabc", 0, 4);
+  x2("ab|(?:ac|az)", "az", 0, 2);
+  x2("a|b|c", "dc", 1, 2);
+  x2("a|b|cd|efg|h|ijk|lmn|o|pq|rstuvwx|yz", "pqr", 0, 2);
+  n("a|b|cd|efg|h|ijk|lmn|o|pq|rstuvwx|yz", "mn");
+  x2("a|^z", "ba", 1, 2);
+  x2("a|^z", "za", 0, 1);
+  x2("a|\\Gz", "bza", 2, 3);
+  x2("a|\\Gz", "za", 0, 1);
+  x2("a|\\Az", "bza", 2, 3);
+  x2("a|\\Az", "za", 0, 1);
+  x2("a|b\\Z", "ba", 1, 2);
+  x2("a|b\\Z", "b", 0, 1);
+  x2("a|b\\z", "ba", 1, 2);
+  x2("a|b\\z", "b", 0, 1);
+  x2("\\w|\\s", " ", 0, 1);
+  n("\\w|\\w", " ");
+  x2("\\w|%", "%", 0, 1);
+  x2("\\w|[&$]", "&", 0, 1);
+  x2("[b-d]|[^e-z]", "a", 0, 1);
+  x2("(?:a|[c-f])|bz", "dz", 0, 1);
+  x2("(?:a|[c-f])|bz", "bz", 0, 2);
+  x2("abc|(?=zz)..f", "zzf", 0, 3);
+  x2("abc|(?!zz)..f", "abf", 0, 3);
+  x2("(?=za)..a|(?=zz)..a", "zza", 0, 3);
+  n("(?>a|abd)c", "abdc");
+  x2("(?>abd|a)c", "abdc", 0, 4);
+  x2("a?|b", "a", 0, 1);
+  x2("a?|b", "b", 0, 0);
+  x2("a?|b", "", 0, 0);
+  x2("a*|b", "aa", 0, 2);
+  x2("a*|b*", "ba", 0, 0);
+  x2("a*|b*", "ab", 0, 1);
+  x2("a+|b*", "", 0, 0);
+  x2("a+|b*", "bbb", 0, 3);
+  x2("a+|b*", "abbb", 0, 1);
+  n("a+|b+", "");
+  x2("(a|b)?", "b", 0, 1);
+  x2("(a|b)*", "ba", 0, 2);
+  x2("(a|b)+", "bab", 0, 3);
+  x2("(ab|ca)+", "caabbc", 0, 4);
+  x2("(ab|ca)+", "aabca", 1, 5);
+  x2("(ab|ca)+", "abzca", 0, 2);
+  x2("(a|bab)+", "ababa", 0, 5);
+  x2("(a|bab)+", "ba", 1, 2);
+  x2("(a|bab)+", "baaaba", 1, 4);
+  x2("(?:a|b)(?:a|b)", "ab", 0, 2);
+  x2("(?:a*|b*)(?:a*|b*)", "aaabbb", 0, 3);
+  x2("(?:a*|b*)(?:a+|b+)", "aaabbb", 0, 6);
+  x2("(?:a+|b+){2}", "aaabbb", 0, 6);
+  x2("h{0,}", "hhhh", 0, 4);
+  x2("(?:a+|b+){1,2}", "aaabbb", 0, 6);
+  n("ax{2}*a", "0axxxa1");
+  n("a.{0,2}a", "0aXXXa0");
+  n("a.{0,2}?a", "0aXXXa0");
+  n("a.{0,2}?a", "0aXXXXa0");
+  x2("^a{2,}?a$", "aaa", 0, 3);
+  x2("^[a-z]{2,}?$", "aaa", 0, 3);
+  x2("(?:a+|\\Ab*)cc", "cc", 0, 2);
+  n("(?:a+|\\Ab*)cc", "abcc");
+  x2("(?:^a+|b+)*c", "aabbbabc", 6, 8);
+  x2("(?:^a+|b+)*c", "aabbbbc", 0, 7);
+  x2("a|(?i)c", "C", 0, 1);
+  x2("(?i)c|a", "C", 0, 1);
+  x2("(?i)c|a", "A", 0, 1);
+  x2("(?i:c)|a", "C", 0, 1);
+  n("(?i:c)|a", "A");
+  x2("[abc]?", "abc", 0, 1);
+  x2("[abc]*", "abc", 0, 3);
+  x2("[^abc]*", "abc", 0, 0);
+  n("[^abc]+", "abc");
+  x2("a?\?", "aaa", 0, 0);
+  x2("ba?\?b", "bab", 0, 3);
+  x2("a*?", "aaa", 0, 0);
+  x2("ba*?", "baa", 0, 1);
+  x2("ba*?b", "baab", 0, 4);
+  x2("a+?", "aaa", 0, 1);
+  x2("ba+?", "baa", 0, 2);
+  x2("ba+?b", "baab", 0, 4);
+  x2("(?:a?)?\?", "a", 0, 0);
+  x2("(?:a?\?)?", "a", 0, 0);
+  x2("(?:a?)+?", "aaa", 0, 1);
+  x2("(?:a+)?\?", "aaa", 0, 0);
+  x2("(?:a+)?\?b", "aaab", 0, 4);
+  x2("(?:ab)?{2}", "", 0, 0);
+  x2("(?:ab)?{2}", "ababa", 0, 4);
+  x2("(?:ab)*{0}", "ababa", 0, 0);
+  x2("(?:ab){3,}", "abababab", 0, 8);
+  n("(?:ab){3,}", "abab");
+  x2("(?:ab){2,4}", "ababab", 0, 6);
+  x2("(?:ab){2,4}", "ababababab", 0, 8);
+  x2("(?:ab){2,4}?", "ababababab", 0, 4);
+  x2("(?:ab){,}", "ab{,}", 0, 5);
+  x2("(?:abc)+?{2}", "abcabcabc", 0, 6);
+  x2("(?:X*)(?i:xa)", "XXXa", 0, 4);
+  x2("(d+)([^abc]z)", "dddz", 0, 4);
+  x2("([^abc]*)([^abc]z)", "dddz", 0, 4);
+  x2("(\\w+)(\\wz)", "dddz", 0, 4);
+  x3("(a)", "a", 0, 1, 1);
+  x3("(ab)", "ab", 0, 2, 1);
+  x2("((ab))", "ab", 0, 2);
+  x3("((ab))", "ab", 0, 2, 1);
+  x3("((ab))", "ab", 0, 2, 2);
+  x3("((((((((((((((((((((ab))))))))))))))))))))", "ab", 0, 2, 20);
+  x3("(ab)(cd)", "abcd", 0, 2, 1);
+  x3("(ab)(cd)", "abcd", 2, 4, 2);
+  x3("()(a)bc(def)ghijk", "abcdefghijk", 3, 6, 3);
+  x3("(()(a)bc(def)ghijk)", "abcdefghijk", 3, 6, 4);
+  x2("(^a)", "a", 0, 1);
+  x3("(a)|(a)", "ba", 1, 2, 1);
+  x3("(^a)|(a)", "ba", 1, 2, 2);
+  x3("(a?)", "aaa", 0, 1, 1);
+  x3("(a*)", "aaa", 0, 3, 1);
+  x3("(a*)", "", 0, 0, 1);
+  x3("(a+)", "aaaaaaa", 0, 7, 1);
+  x3("(a+|b*)", "bbbaa", 0, 3, 1);
+  x3("(a+|b?)", "bbbaa", 0, 1, 1);
+  x3("(abc)?", "abc", 0, 3, 1);
+  x3("(abc)*", "abc", 0, 3, 1);
+  x3("(abc)+", "abc", 0, 3, 1);
+  x3("(xyz|abc)+", "abc", 0, 3, 1);
+  x3("([xyz][abc]|abc)+", "abc", 0, 3, 1);
+  x3("((?i:abc))", "AbC", 0, 3, 1);
+  x2("(abc)(?i:\\1)", "abcABC", 0, 6);
+  x3("((?m:a.c))", "a\nc", 0, 3, 1);
+  x3("((?=az)a)", "azb", 0, 1, 1);
+  x3("abc|(.abd)", "zabd", 0, 4, 1);
+  x2("(?:abc)|(ABC)", "abc", 0, 3);
+  x3("(?i:(abc))|(zzz)", "ABC", 0, 3, 1);
+  x3("a*(.)", "aaaaz", 4, 5, 1);
+  x3("a*?(.)", "aaaaz", 0, 1, 1);
+  x3("a*?(c)", "aaaac", 4, 5, 1);
+  x3("[bcd]a*(.)", "caaaaz", 5, 6, 1);
+  x3("(\\Abb)cc", "bbcc", 0, 2, 1);
+  n("(\\Abb)cc", "zbbcc");
+  x3("(^bb)cc", "bbcc", 0, 2, 1);
+  n("(^bb)cc", "zbbcc");
+  x3("cc(bb$)", "ccbb", 2, 4, 1);
+  n("cc(bb$)", "ccbbb");
+  n("(\\1)", "");
+  n("\\1(a)", "aa");
+  n("(a(b)\\1)\\2+", "ababb");
+  n("(?:(?:\\1|z)(a))+$", "zaa");
+  x2("(?:(?:\\1|z)(a))+$", "zaaa", 0, 4);
+  x2("(a)(?=\\1)", "aa", 0, 1);
+  n("(a)$|\\1", "az");
+  x2("(a)\\1", "aa", 0, 2);
+  n("(a)\\1", "ab");
+  x2("(a?)\\1", "aa", 0, 2);
+  x2("(a?\?)\\1", "aa", 0, 0);
+  x2("(a*)\\1", "aaaaa", 0, 4);
+  x3("(a*)\\1", "aaaaa", 0, 2, 1);
+  x2("a(b*)\\1", "abbbb", 0, 5);
+  x2("a(b*)\\1", "ab", 0, 1);
+  x2("(a*)(b*)\\1\\2", "aaabbaaabb", 0, 10);
+  x2("(a*)(b*)\\2", "aaabbbb", 0, 7);
+  x2("(((((((a*)b))))))c\\7", "aaabcaaa", 0, 8);
+  x3("(((((((a*)b))))))c\\7", "aaabcaaa", 0, 3, 7);
+  x2("(a)(b)(c)\\2\\1\\3", "abcbac", 0, 6);
+  x2("([a-d])\\1", "cc", 0, 2);
+  x2("(\\w\\d\\s)\\1", "f5 f5 ", 0, 6);
+  n("(\\w\\d\\s)\\1", "f5 f5");
+  x2("(who|[a-c]{3})\\1", "whowho", 0, 6);
+  x2("...(who|[a-c]{3})\\1", "abcwhowho", 0, 9);
+  x2("(who|[a-c]{3})\\1", "cbccbc", 0, 6);
+  x2("(^a)\\1", "aa", 0, 2);
+  n("(^a)\\1", "baa");
+  n("(a$)\\1", "aa");
+  n("(ab\\Z)\\1", "ab");
+  x2("(a*\\Z)\\1", "a", 1, 1);
+  x2(".(a*\\Z)\\1", "ba", 1, 2);
+  x3("(.(abc)\\2)", "zabcabc", 0, 7, 1);
+  x3("(.(..\\d.)\\2)", "z12341234", 0, 9, 1);
+  x2("((?i:az))\\1", "AzAz", 0, 4);
+  n("((?i:az))\\1", "Azaz");
+  x2("(?<=a)b", "ab", 1, 2);
+  n("(?<=a)b", "bb");
+  x2("(?<=a|b)b", "bb", 1, 2);
+  x2("(?<=a|bc)b", "bcb", 2, 3);
+  x2("(?<=a|bc)b", "ab", 1, 2);
+  x2("(?<=a|bc||defghij|klmnopq|r)z", "rz", 1, 2);
+  x2("(a)\\g<1>", "aa", 0, 2);
+  x2("(?<!a)b", "cb", 1, 2);
+  n("(?<!a)b", "ab");
+  x2("(?<!a|bc)b", "bbb", 0, 1);
+  n("(?<!a|bc)z", "bcz");
+  x2("(?<name1>a)", "a", 0, 1);
+  x2("(?<name_2>ab)\\g<name_2>", "abab", 0, 4);
+  x2("(?<name_3>.zv.)\\k<name_3>", "azvbazvb", 0, 8);
+  x2("(?<=\\g<ab>)|-\\zEND (?<ab>XyZ)", "XyZ", 3, 3);
+  x2("(?<n>|a\\g<n>)+", "", 0, 0);
+  x2("(?<n>|\\(\\g<n>\\))+$", "()(())", 0, 6);
+  x3("\\g<n>(?<n>.){0}", "X", 0, 1, 1);
+  x2("\\g<n>(abc|df(?<n>.YZ){2,8}){0}", "XYZ", 0, 3);
+  x2("\\A(?<n>(a\\g<n>)|)\\z", "aaaa", 0, 4);
+  x2("(?<n>|\\g<m>\\g<n>)\\z|\\zEND (?<m>a|(b)\\g<m>)", "bbbbabba", 0, 8);
+  x2("(?<name1240>\\w+\\sx)a+\\k<name1240>", "  fg xaaaaaaaafg x", 2, 18);
+  x3("(z)()()(?<_9>a)\\g<_9>", "zaa", 2, 3, 1);
+  x2("(.)(((?<_>a)))\\k<_>", "zaa", 0, 3);
+  x2("((?<name1>\\d)|(?<name2>\\w))(\\k<name1>|\\k<name2>)", "ff", 0, 2);
+  x2("(?:(?<x>)|(?<x>efg))\\k<x>", "", 0, 0);
+  x2("(?:(?<x>abc)|(?<x>efg))\\k<x>", "abcefgefg", 3, 9);
+  n("(?:(?<x>abc)|(?<x>efg))\\k<x>", "abcefg");
+  x2("(?:(?<n1>.)|(?<n1>..)|(?<n1>...)|(?<n1>....)|(?<n1>.....)|(?<n1>......)|(?<n1>.......)|(?<n1>........)|(?<n1>.........)|(?<n1>..........)|(?<n1>...........)|(?<n1>............)|(?<n1>.............)|(?<n1>..............))\\k<n1>$", "a-pyumpyum", 2, 10);
+  x3("(?:(?<n1>.)|(?<n1>..)|(?<n1>...)|(?<n1>....)|(?<n1>.....)|(?<n1>......)|(?<n1>.......)|(?<n1>........)|(?<n1>.........)|(?<n1>..........)|(?<n1>...........)|(?<n1>............)|(?<n1>.............)|(?<n1>..............))\\k<n1>$", "xxxxabcdefghijklmnabcdefghijklmn", 4, 18, 14);
+  x3("(?<name1>)(?<name2>)(?<name3>)(?<name4>)(?<name5>)(?<name6>)(?<name7>)(?<name8>)(?<name9>)(?<name10>)(?<name11>)(?<name12>)(?<name13>)(?<name14>)(?<name15>)(?<name16>aaa)(?<name17>)$", "aaa", 0, 3, 16);
+  x2("(?<foo>a|\\(\\g<foo>\\))", "a", 0, 1);
+  x2("(?<foo>a|\\(\\g<foo>\\))", "((((((a))))))", 0, 13);
+  x3("(?<foo>a|\\(\\g<foo>\\))", "((((((((a))))))))", 0, 17, 1);
+  x2("\\g<bar>|\\zEND(?<bar>.*abc$)", "abcxxxabc", 0, 9);
+  x2("\\g<1>|\\zEND(.a.)", "bac", 0, 3);
+  x3("\\g<_A>\\g<_A>|\\zEND(.a.)(?<_A>.b.)", "xbxyby", 3, 6, 1);
+  x2("\\A(?:\\g<pon>|\\g<pan>|\\zEND  (?<pan>a|c\\g<pon>c)(?<pon>b|d\\g<pan>d))$", "cdcbcdc", 0, 7);
+  x2("\\A(?<n>|a\\g<m>)\\z|\\zEND (?<m>\\g<n>)", "aaaa", 0, 4);
+  x2("(?<n>(a|b\\g<n>c){3,5})", "baaaaca", 1, 5);
+  x2("(?<n>(a|b\\g<n>c){3,5})", "baaaacaaaaa", 0, 10);
+  x2("(?<pare>\\(([^\\(\\)]++|\\g<pare>)*+\\))", "((a))", 0, 5);
+  x2("()*\\1", "", 0, 0);
+  x2("(?:()|())*\\1\\2", "", 0, 0);
+  x3("(?:\\1a|())*", "a", 0, 0, 1);
+  x2("x((.)*)*x", "0x1x2x3", 1, 6);
+  x2("x((.)*)*x(?i:\\1)\\Z", "0x1x2x1X2", 1, 9);
+  x2("(?:()|()|()|()|()|())*\\2\\5", "", 0, 0);
+  x2("(?:()|()|()|(x)|()|())*\\2b\\5", "b", 0, 1);
+  x2("\\xED\\xF2", "\xed\xf2", 0, 2);
+  x2("", "¤¢", 0, 0);
+  x2("¤¢", "¤¢", 0, 2);
+  n("¤¤", "¤¢");
+  x2("¤¦¤¦", "¤¦¤¦", 0, 4);
+  x2("¤¢¤¤¤¦", "¤¢¤¤¤¦", 0, 6);
+  x2("¤³¤³¤³¤³¤³¤³¤³¤³¤³¤³¤³¤³¤³¤³¤³¤³¤³¤³¤³¤³¤³¤³¤³¤³¤³¤³¤³¤³¤³¤³¤³¤³¤³¤³¤³", "¤³¤³¤³¤³¤³¤³¤³¤³¤³¤³¤³¤³¤³¤³¤³¤³¤³¤³¤³¤³¤³¤³¤³¤³¤³¤³¤³¤³¤³¤³¤³¤³¤³¤³¤³", 0, 70);
+  x2("¤¢", "¤¤¤¢", 2, 4);
+  x2("¤¤¤¦", "¤¢¤¤¤¦", 2, 6);
+  x2("\\xca\\xb8", "\xca\xb8", 0, 2);
+  x2(".", "¤¢", 0, 2);
+  x2("..", "¤«¤­", 0, 4);
+  x2("\\w", "¤ª", 0, 2);
+  n("\\W", "¤¢");
+  x2("[\\W]", "¤¦$", 2, 3);
+  x2("\\S", "¤½", 0, 2);
+  x2("\\S", "´Á", 0, 2);
+  x2("\\b", "µ¤ ", 0, 0);
+  x2("\\b", " ¤Û", 1, 1);
+  x2("\\B", "¤»¤½ ", 2, 2);
+  x2("\\B", "¤¦ ", 3, 3);
+  x2("\\B", " ¤¤", 0, 0);
+  x2("[¤¿¤Á]", "¤Á", 0, 2);
+  n("[¤Ê¤Ë]", "¤Ì");
+  x2("[¤¦-¤ª]", "¤¨", 0, 2);
+  n("[^¤±]", "¤±");
+  x2("[\\w]", "¤Í", 0, 2);
+  n("[\\d]", "¤Õ");
+  x2("[\\D]", "¤Ï", 0, 2);
+  n("[\\s]", "¤¯");
+  x2("[\\S]", "¤Ø", 0, 2);
+  x2("[\\w\\d]", "¤è", 0, 2);
+  x2("[\\w\\d]", "   ¤è", 3, 5);
+  n("\\wµ´¼Ö", " µ´¼Ö");
+  x2("µ´\\W¼Ö", "µ´ ¼Ö", 0, 5);
+  x2("¤¢.¤¤.¤¦", "¤¢¤¢¤¤¤¤¤¦", 0, 10);
+  x2(".\\w¤¦\\W..¤¾", "¤¨¤¦¤¦ ¤¦¤¾¤¾", 0, 13);
+  x2("\\s\\w¤³¤³¤³", " ¤³¤³¤³¤³", 0, 9);
+  x2("¤¢¤¢.¤±", "¤¢¤¢¤±¤±", 0, 8);
+  n(".¤¤", "¤¤¤¨");
+  x2(".¤ª", "¤ª¤ª", 0, 4);
+  x2("^¤¢", "¤¢", 0, 2);
+  x2("^¤à$", "¤à", 0, 2);
+  x2("^\\w$", "¤Ë", 0, 2);
+  x2("^\\w¤«¤­¤¯¤±¤³$", "z¤«¤­¤¯¤±¤³", 0, 11);
+  x2("^\\w...¤¦¤¨¤ª$", "z¤¢¤¤¤¦¤¦¤¨¤ª", 0, 13);
+  x2("\\w\\w\\s\\W¤ª¤ª¤ª\\d", "a¤ª  ¤ª¤ª¤ª4", 0, 12);
+  x2("\\A¤¿¤Á¤Ä", "¤¿¤Á¤Ä", 0, 6);
+  x2("¤à¤á¤â\\Z", "¤à¤á¤â", 0, 6);
+  x2("¤«¤­¤¯\\z", "¤«¤­¤¯", 0, 6);
+  x2("¤«¤­¤¯\\Z", "¤«¤­¤¯\n", 0, 6);
+  x2("\\G¤Ý¤Ô", "¤Ý¤Ô", 0, 4);
+  n("\\G¤¨", "¤¦¤¨¤ª");
+  n("¤È¤Æ\\G", "¤È¤Æ");
+  n("¤Þ¤ß\\A", "¤Þ¤ß");
+  n("¤Þ\\A¤ß", "¤Þ¤ß");
+  x2("(?=¤»)¤»", "¤»", 0, 2);
+  n("(?=¤¦).", "¤¤");
+  x2("(?!¤¦)¤«", "¤«", 0, 2);
+  n("(?!¤È)¤¢", "¤È");
+  x2("(?i:¤¢)", "¤¢", 0, 2);
+  x2("(?i:¤Ö¤Ù)", "¤Ö¤Ù", 0, 4);
+  n("(?i:¤¤)", "¤¦");
+  x2("(?m:¤è.)", "¤è\n", 0, 3);
+  x2("(?m:.¤á)", "¤Þ\n¤á", 2, 5);
+  x2("¤¢?", "", 0, 0);
+  x2("ÊÑ?", "²½", 0, 0);
+  x2("ÊÑ?", "ÊÑ", 0, 2);
+  x2("ÎÌ*", "", 0, 0);
+  x2("ÎÌ*", "ÎÌ", 0, 2);
+  x2("»Ò*", "»Ò»Ò»Ò", 0, 6);
+  x2("ÇÏ*", "¼¯ÇÏÇÏÇÏÇÏ", 0, 0);
+  n("»³+", "");
+  x2("²Ï+", "²Ï", 0, 2);
+  x2("»þ+", "»þ»þ»þ»þ", 0, 8);
+  x2("¤¨+", "¤¨¤¨¤¦¤¦¤¦", 0, 4);
+  x2("¤¦+", "¤ª¤¦¤¦¤¦¤¦", 2, 10);
+  x2(".?", "¤¿", 0, 2);
+  x2(".*", "¤Ñ¤Ô¤×¤Ú", 0, 8);
+  x2(".+", "¤í", 0, 2);
+  x2(".+", "¤¤¤¦¤¨¤«\n", 0, 8);
+  x2("¤¢|¤¤", "¤¢", 0, 2);
+  x2("¤¢|¤¤", "¤¤", 0, 2);
+  x2("¤¢¤¤|¤¤¤¦", "¤¢¤¤", 0, 4);
+  x2("¤¢¤¤|¤¤¤¦", "¤¤¤¦", 0, 4);
+  x2("¤ò(?:¤«¤­|¤­¤¯)", "¤ò¤«¤­", 0, 6);
+  x2("¤ò(?:¤«¤­|¤­¤¯)¤±", "¤ò¤­¤¯¤±", 0, 8);
+  x2("¤¢¤¤|(?:¤¢¤¦|¤¢¤ò)", "¤¢¤ò", 0, 4);
+  x2("¤¢|¤¤|¤¦", "¤¨¤¦", 2, 4);
+  x2("¤¢|¤¤|¤¦¤¨|¤ª¤«¤­|¤¯|¤±¤³¤µ|¤·¤¹¤»|¤½|¤¿¤Á|¤Ä¤Æ¤È¤Ê¤Ë|¤Ì¤Í", "¤·¤¹¤»", 0, 6);
+  n("¤¢|¤¤|¤¦¤¨|¤ª¤«¤­|¤¯|¤±¤³¤µ|¤·¤¹¤»|¤½|¤¿¤Á|¤Ä¤Æ¤È¤Ê¤Ë|¤Ì¤Í", "¤¹¤»");
+  x2("¤¢|^¤ï", "¤Ö¤¢", 2, 4);
+  x2("¤¢|^¤ò", "¤ò¤¢", 0, 2);
+  x2("µ´|\\G¼Ö", "¤±¼Öµ´", 4, 6);
+  x2("µ´|\\G¼Ö", "¼Öµ´", 0, 2);
+  x2("µ´|\\A¼Ö", "b¼Öµ´", 3, 5);
+  x2("µ´|\\A¼Ö", "¼Ö", 0, 2);
+  x2("µ´|¼Ö\\Z", "¼Öµ´", 2, 4);
+  x2("µ´|¼Ö\\Z", "¼Ö", 0, 2);
+  x2("µ´|¼Ö\\Z", "¼Ö\n", 0, 2);
+  x2("µ´|¼Ö\\z", "¼Öµ´", 2, 4);
+  x2("µ´|¼Ö\\z", "¼Ö", 0, 2);
+  x2("\\w|\\s", "¤ª", 0, 2);
+  x2("\\w|%", "%¤ª", 0, 1);
+  x2("\\w|[&$]", "¤¦&", 0, 2);
+  x2("[¤¤-¤±]", "¤¦", 0, 2);
+  x2("[¤¤-¤±]|[^¤«-¤³]", "¤¢", 0, 2);
+  x2("[¤¤-¤±]|[^¤«-¤³]", "¤«", 0, 2);
+  x2("[^¤¢]", "\n", 0, 1);
+  x2("(?:¤¢|[¤¦-¤­])|¤¤¤ò", "¤¦¤ò", 0, 2);
+  x2("(?:¤¢|[¤¦-¤­])|¤¤¤ò", "¤¤¤ò", 0, 4);
+  x2("¤¢¤¤¤¦|(?=¤±¤±)..¤Û", "¤±¤±¤Û", 0, 6);
+  x2("¤¢¤¤¤¦|(?!¤±¤±)..¤Û", "¤¢¤¤¤Û", 0, 6);
+  x2("(?=¤ò¤¢)..¤¢|(?=¤ò¤ò)..¤¢", "¤ò¤ò¤¢", 0, 6);
+  x2("(?<=¤¢|¤¤¤¦)¤¤", "¤¤¤¦¤¤", 4, 6);
+  n("(?>¤¢|¤¢¤¤¤¨)¤¦", "¤¢¤¤¤¨¤¦");
+  x2("(?>¤¢¤¤¤¨|¤¢)¤¦", "¤¢¤¤¤¨¤¦", 0, 8);
+  x2("¤¢?|¤¤", "¤¢", 0, 2);
+  x2("¤¢?|¤¤", "¤¤", 0, 0);
+  x2("¤¢?|¤¤", "", 0, 0);
+  x2("¤¢*|¤¤", "¤¢¤¢", 0, 4);
+  x2("¤¢*|¤¤*", "¤¤¤¢", 0, 0);
+  x2("¤¢*|¤¤*", "¤¢¤¤", 0, 2);
+  x2("[a¤¢]*|¤¤*", "a¤¢¤¤¤¤¤¤", 0, 3);
+  x2("¤¢+|¤¤*", "", 0, 0);
+  x2("¤¢+|¤¤*", "¤¤¤¤¤¤", 0, 6);
+  x2("¤¢+|¤¤*", "¤¢¤¤¤¤¤¤", 0, 2);
+  x2("¤¢+|¤¤*", "a¤¢¤¤¤¤¤¤", 0, 0);
+  n("¤¢+|¤¤+", "");
+  x2("(¤¢|¤¤)?", "¤¤", 0, 2);
+  x2("(¤¢|¤¤)*", "¤¤¤¢", 0, 4);
+  x2("(¤¢|¤¤)+", "¤¤¤¢¤¤", 0, 6);
+  x2("(¤¢¤¤|¤¦¤¢)+", "¤¦¤¢¤¢¤¤¤¦¤¨", 0, 8);
+  x2("(¤¢¤¤|¤¦¤¨)+", "¤¦¤¢¤¢¤¤¤¦¤¨", 4, 12);
+  x2("(¤¢¤¤|¤¦¤¢)+", "¤¢¤¢¤¤¤¦¤¢", 2, 10);
+  x2("(¤¢¤¤|¤¦¤¢)+", "¤¢¤¤¤ò¤¦¤¢", 0, 4);
+  x2("(¤¢¤¤|¤¦¤¢)+", "$$zzzz¤¢¤¤¤ò¤¦¤¢", 6, 10);
+  x2("(¤¢|¤¤¤¢¤¤)+", "¤¢¤¤¤¢¤¤¤¢", 0, 10);
+  x2("(¤¢|¤¤¤¢¤¤)+", "¤¤¤¢", 2, 4);
+  x2("(¤¢|¤¤¤¢¤¤)+", "¤¤¤¢¤¢¤¢¤¤¤¢", 2, 8);
+  x2("(?:¤¢|¤¤)(?:¤¢|¤¤)", "¤¢¤¤", 0, 4);
+  x2("(?:¤¢*|¤¤*)(?:¤¢*|¤¤*)", "¤¢¤¢¤¢¤¤¤¤¤¤", 0, 6);
+  x2("(?:¤¢*|¤¤*)(?:¤¢+|¤¤+)", "¤¢¤¢¤¢¤¤¤¤¤¤", 0, 12);
+  x2("(?:¤¢+|¤¤+){2}", "¤¢¤¢¤¢¤¤¤¤¤¤", 0, 12);
+  x2("(?:¤¢+|¤¤+){1,2}", "¤¢¤¢¤¢¤¤¤¤¤¤", 0, 12);
+  x2("(?:¤¢+|\\A¤¤*)¤¦¤¦", "¤¦¤¦", 0, 4);
+  n("(?:¤¢+|\\A¤¤*)¤¦¤¦", "¤¢¤¤¤¦¤¦");
+  x2("(?:^¤¢+|¤¤+)*¤¦", "¤¢¤¢¤¤¤¤¤¤¤¢¤¤¤¦", 12, 16);
+  x2("(?:^¤¢+|¤¤+)*¤¦", "¤¢¤¢¤¤¤¤¤¤¤¤¤¦", 0, 14);
+  x2("¤¦{0,}", "¤¦¤¦¤¦¤¦", 0, 8);
+  x2("¤¢|(?i)c", "C", 0, 1);
+  x2("(?i)c|¤¢", "C", 0, 1);
+  x2("(?i:¤¢)|a", "a", 0, 1);
+  n("(?i:¤¢)|a", "A");
+  x2("[¤¢¤¤¤¦]?", "¤¢¤¤¤¦", 0, 2);
+  x2("[¤¢¤¤¤¦]*", "¤¢¤¤¤¦", 0, 6);
+  x2("[^¤¢¤¤¤¦]*", "¤¢¤¤¤¦", 0, 0);
+  n("[^¤¢¤¤¤¦]+", "¤¢¤¤¤¦");
+  x2("¤¢?\?", "¤¢¤¢¤¢", 0, 0);
+  x2("¤¤¤¢?\?¤¤", "¤¤¤¢¤¤", 0, 6);
+  x2("¤¢*?", "¤¢¤¢¤¢", 0, 0);
+  x2("¤¤¤¢*?", "¤¤¤¢¤¢", 0, 2);
+  x2("¤¤¤¢*?¤¤", "¤¤¤¢¤¢¤¤", 0, 8);
+  x2("¤¢+?", "¤¢¤¢¤¢", 0, 2);
+  x2("¤¤¤¢+?", "¤¤¤¢¤¢", 0, 4);
+  x2("¤¤¤¢+?¤¤", "¤¤¤¢¤¢¤¤", 0, 8);
+  x2("(?:ŷ?)?\?", "ŷ", 0, 0);
+  x2("(?:ŷ?\?)?", "ŷ", 0, 0);
+  x2("(?:̴?)+?", "̴̴̴", 0, 2);
+  x2("(?:É÷+)?\?", "É÷É÷É÷", 0, 0);
+  x2("(?:Àã+)?\?Áú", "ÀãÀãÀãÁú", 0, 8);
+  x2("(?:¤¢¤¤)?{2}", "", 0, 0);
+  x2("(?:µ´¼Ö)?{2}", "µ´¼Öµ´¼Öµ´", 0, 8);
+  x2("(?:µ´¼Ö)*{0}", "µ´¼Öµ´¼Öµ´", 0, 0);
+  x2("(?:µ´¼Ö){3,}", "µ´¼Öµ´¼Öµ´¼Öµ´¼Ö", 0, 16);
+  n("(?:µ´¼Ö){3,}", "µ´¼Öµ´¼Ö");
+  x2("(?:µ´¼Ö){2,4}", "µ´¼Öµ´¼Öµ´¼Ö", 0, 12);
+  x2("(?:µ´¼Ö){2,4}", "µ´¼Öµ´¼Öµ´¼Öµ´¼Öµ´¼Ö", 0, 16);
+  x2("(?:µ´¼Ö){2,4}?", "µ´¼Öµ´¼Öµ´¼Öµ´¼Öµ´¼Ö", 0, 8);
+  x2("(?:µ´¼Ö){,}", "µ´¼Ö{,}", 0, 7);
+  x2("(?:¤«¤­¤¯)+?{2}", "¤«¤­¤¯¤«¤­¤¯¤«¤­¤¯", 0, 12);
+  x3("(²Ð)", "²Ð", 0, 2, 1);
+  x3("(²Ð¿å)", "²Ð¿å", 0, 4, 1);
+  x2("((»þ´Ö))", "»þ´Ö", 0, 4);
+  x3("((É÷¿å))", "É÷¿å", 0, 4, 1);
+  x3("((ºòÆü))", "ºòÆü", 0, 4, 2);
+  x3("((((((((((((((((((((ÎÌ»Ò))))))))))))))))))))", "ÎÌ»Ò", 0, 4, 20);
+  x3("(¤¢¤¤)(¤¦¤¨)", "¤¢¤¤¤¦¤¨", 0, 4, 1);
+  x3("(¤¢¤¤)(¤¦¤¨)", "¤¢¤¤¤¦¤¨", 4, 8, 2);
+  x3("()(¤¢)¤¤¤¦(¤¨¤ª¤«)¤­¤¯¤±¤³", "¤¢¤¤¤¦¤¨¤ª¤«¤­¤¯¤±¤³", 6, 12, 3);
+  x3("(()(¤¢)¤¤¤¦(¤¨¤ª¤«)¤­¤¯¤±¤³)", "¤¢¤¤¤¦¤¨¤ª¤«¤­¤¯¤±¤³", 6, 12, 4);
+  x3(".*(¥Õ¥©)¥ó¡¦¥Þ(¥ó()¥·¥å¥¿)¥¤¥ó", "¥Õ¥©¥ó¡¦¥Þ¥ó¥·¥å¥¿¥¤¥ó", 10, 18, 2);
+  x2("(^¤¢)", "¤¢", 0, 2);
+  x3("(¤¢)|(¤¢)", "¤¤¤¢", 2, 4, 1);
+  x3("(^¤¢)|(¤¢)", "¤¤¤¢", 2, 4, 2);
+  x3("(¤¢?)", "¤¢¤¢¤¢", 0, 2, 1);
+  x3("(¤Þ*)", "¤Þ¤Þ¤Þ", 0, 6, 1);
+  x3("(¤È*)", "", 0, 0, 1);
+  x3("(¤ë+)", "¤ë¤ë¤ë¤ë¤ë¤ë¤ë", 0, 14, 1);
+  x3("(¤Õ+|¤Ø*)", "¤Õ¤Õ¤Õ¤Ø¤Ø", 0, 6, 1);
+  x3("(¤¢+|¤¤?)", "¤¤¤¤¤¤¤¢¤¢", 0, 2, 1);
+  x3("(¤¢¤¤¤¦)?", "¤¢¤¤¤¦", 0, 6, 1);
+  x3("(¤¢¤¤¤¦)*", "¤¢¤¤¤¦", 0, 6, 1);
+  x3("(¤¢¤¤¤¦)+", "¤¢¤¤¤¦", 0, 6, 1);
+  x3("(¤µ¤·¤¹|¤¢¤¤¤¦)+", "¤¢¤¤¤¦", 0, 6, 1);
+  x3("([¤Ê¤Ë¤Ì][¤«¤­¤¯]|¤«¤­¤¯)+", "¤«¤­¤¯", 0, 6, 1);
+  x3("((?i:¤¢¤¤¤¦))", "¤¢¤¤¤¦", 0, 6, 1);
+  x3("((?m:¤¢.¤¦))", "¤¢\n¤¦", 0, 5, 1);
+  x3("((?=¤¢¤ó)¤¢)", "¤¢¤ó¤¤", 0, 2, 1);
+  x3("¤¢¤¤¤¦|(.¤¢¤¤¤¨)", "¤ó¤¢¤¤¤¨", 0, 8, 1);
+  x3("¤¢*(.)", "¤¢¤¢¤¢¤¢¤ó", 8, 10, 1);
+  x3("¤¢*?(.)", "¤¢¤¢¤¢¤¢¤ó", 0, 2, 1);
+  x3("¤¢*?(¤ó)", "¤¢¤¢¤¢¤¢¤ó", 8, 10, 1);
+  x3("[¤¤¤¦¤¨]¤¢*(.)", "¤¨¤¢¤¢¤¢¤¢¤ó", 10, 12, 1);
+  x3("(\\A¤¤¤¤)¤¦¤¦", "¤¤¤¤¤¦¤¦", 0, 4, 1);
+  n("(\\A¤¤¤¤)¤¦¤¦", "¤ó¤¤¤¤¤¦¤¦");
+  x3("(^¤¤¤¤)¤¦¤¦", "¤¤¤¤¤¦¤¦", 0, 4, 1);
+  n("(^¤¤¤¤)¤¦¤¦", "¤ó¤¤¤¤¤¦¤¦");
+  x3("¤í¤í(¤ë¤ë$)", "¤í¤í¤ë¤ë", 4, 8, 1);
+  n("¤í¤í(¤ë¤ë$)", "¤í¤í¤ë¤ë¤ë");
+  x2("(̵)\\1", "̵̵", 0, 4);
+  n("(̵)\\1", "̵Éð");
+  x2("(¶õ?)\\1", "¶õ¶õ", 0, 4);
+  x2("(¶õ?\?)\\1", "¶õ¶õ", 0, 0);
+  x2("(¶õ*)\\1", "¶õ¶õ¶õ¶õ¶õ", 0, 8);
+  x3("(¶õ*)\\1", "¶õ¶õ¶õ¶õ¶õ", 0, 4, 1);
+  x2("¤¢(¤¤*)\\1", "¤¢¤¤¤¤¤¤¤¤", 0, 10);
+  x2("¤¢(¤¤*)\\1", "¤¢¤¤", 0, 2);
+  x2("(¤¢*)(¤¤*)\\1\\2", "¤¢¤¢¤¢¤¤¤¤¤¢¤¢¤¢¤¤¤¤", 0, 20);
+  x2("(¤¢*)(¤¤*)\\2", "¤¢¤¢¤¢¤¤¤¤¤¤¤¤", 0, 14);
+  x3("(¤¢*)(¤¤*)\\2", "¤¢¤¢¤¢¤¤¤¤¤¤¤¤", 6, 10, 2);
+  x2("(((((((¤Ý*)¤Ú))))))¤Ô\\7", "¤Ý¤Ý¤Ý¤Ú¤Ô¤Ý¤Ý¤Ý", 0, 16);
+  x3("(((((((¤Ý*)¤Ú))))))¤Ô\\7", "¤Ý¤Ý¤Ý¤Ú¤Ô¤Ý¤Ý¤Ý", 0, 6, 7);
+  x2("(¤Ï)(¤Ò)(¤Õ)\\2\\1\\3", "¤Ï¤Ò¤Õ¤Ò¤Ï¤Õ", 0, 12);
+  x2("([¤­-¤±])\\1", "¤¯¤¯", 0, 4);
+  x2("(\\w\\d\\s)\\1", "¤¢5 ¤¢5 ", 0, 8);
+  n("(\\w\\d\\s)\\1", "¤¢5 ¤¢5");
+  x2("(ï¡©|[¤¢-¤¦]{3})\\1", "ï¡©", 0, 8);
+  x2("...(ï¡©|[¤¢-¤¦]{3})\\1", "¤¢a¤¢Ã¯¡©Ã¯¡©", 0, 13);
+  x2("(ï¡©|[¤¢-¤¦]{3})\\1", "¤¦¤¤¤¦¤¦¤¤¤¦", 0, 12);
+  x2("(^¤³)\\1", "¤³¤³", 0, 4);
+  n("(^¤à)\\1", "¤á¤à¤à");
+  n("(¤¢$)\\1", "¤¢¤¢");
+  n("(¤¢¤¤\\Z)\\1", "¤¢¤¤");
+  x2("(¤¢*\\Z)\\1", "¤¢", 2, 2);
+  x2(".(¤¢*\\Z)\\1", "¤¤¤¢", 2, 4);
+  x3("(.(¤ä¤¤¤æ)\\2)", "z¤ä¤¤¤æ¤ä¤¤¤æ", 0, 13, 1);
+  x3("(.(..\\d.)\\2)", "¤¢12341234", 0, 10, 1);
+  x2("((?i:¤¢v¤º))\\1", "¤¢v¤º¤¢v¤º", 0, 10);
+  x2("(?<¶ò¤«>ÊÑ|\\(\\g<¶ò¤«>\\))", "((((((ÊÑ))))))", 0, 14);
+  x2("\\A(?:\\g<°¤_1>|\\g<±¾_2>|\\z½ªÎ»  (?<°¤_1>´Ñ|¼«\\g<±¾_2>¼«)(?<±¾_2>ºß|Êî»§\\g<°¤_1>Êî»§))$", "Êî»§¼«Êî»§¼«ºß¼«Êî»§¼«Êî»§", 0, 26);
+  x2("[[¤Ò¤Õ]]", "¤Õ", 0, 2);
+  x2("[[¤¤¤ª¤¦]¤«]", "¤«", 0, 2);
+  n("[[^¤¢]]", "¤¢");
+  n("[^[¤¢]]", "¤¢");
+  x2("[^[^¤¢]]", "¤¢", 0, 2);
+  x2("[[¤«¤­¤¯]&&¤­¤¯]", "¤¯", 0, 2);
+  n("[[¤«¤­¤¯]&&¤­¤¯]", "¤«");
+  n("[[¤«¤­¤¯]&&¤­¤¯]", "¤±");
+  x2("[¤¢-¤ó&&¤¤-¤ò&&¤¦-¤ñ]", "¤ñ", 0, 2);
+  n("[^¤¢-¤ó&&¤¤-¤ò&&¤¦-¤ñ]", "¤ñ");
+  x2("[[^¤¢&&¤¢]&&¤¢-¤ó]", "¤¤", 0, 2);
+  n("[[^¤¢&&¤¢]&&¤¢-¤ó]", "¤¢");
+  x2("[[^¤¢-¤ó&&¤¤¤¦¤¨¤ª]&&[^¤¦-¤«]]", "¤­", 0, 2);
+  n("[[^¤¢-¤ó&&¤¤¤¦¤¨¤ª]&&[^¤¦-¤«]]", "¤¤");
+  x2("[^[^¤¢¤¤¤¦]&&[^¤¦¤¨¤ª]]", "¤¦", 0, 2);
+  x2("[^[^¤¢¤¤¤¦]&&[^¤¦¤¨¤ª]]", "¤¨", 0, 2);
+  n("[^[^¤¢¤¤¤¦]&&[^¤¦¤¨¤ª]]", "¤«");
+  x2("[¤¢-&&-¤¢]", "-", 0, 1);
+  x2("[^[^a-z¤¢¤¤¤¦]&&[^bcdefg¤¦¤¨¤ª]q-w]", "¤¨", 0, 2);
+  x2("[^[^a-z¤¢¤¤¤¦]&&[^bcdefg¤¦¤¨¤ª]g-w]", "f", 0, 1);
+  x2("[^[^a-z¤¢¤¤¤¦]&&[^bcdefg¤¦¤¨¤ª]g-w]", "g", 0, 1);
+  n("[^[^a-z¤¢¤¤¤¦]&&[^bcdefg¤¦¤¨¤ª]g-w]", "2");
+  x2("a<b>¥Ð¡¼¥¸¥ç¥ó¤Î¥À¥¦¥ó¥í¡¼¥É<\\/b>", "a<b>¥Ð¡¼¥¸¥ç¥ó¤Î¥À¥¦¥ó¥í¡¼¥É</b>", 0, 32);
+  x2(".<b>¥Ð¡¼¥¸¥ç¥ó¤Î¥À¥¦¥ó¥í¡¼¥É<\\/b>", "a<b>¥Ð¡¼¥¸¥ç¥ó¤Î¥À¥¦¥ó¥í¡¼¥É</b>", 0, 32);
+  fprintf(stdout,
+       "\nRESULT   SUCC: %d,  FAIL: %d,  ERROR: %d      (by Oniguruma %s)\n",
+       nsucc, nfail, nerror, onig_version());
+
+#ifndef POSIX_TEST
+  onig_region_free(region, 1);
+  onig_end();
+#endif
+
+  return ((nfail == 0 && nerror == 0) ? 0 : -1);
+}
diff --git a/ext/mbstring/oniguruma/testu.c b/ext/mbstring/oniguruma/testu.c
new file mode 100644 (file)
index 0000000..5652988
--- /dev/null
@@ -0,0 +1,911 @@
+/*
+ * This program was generated by testconv.rb.
+ */
+#include<stdio.h>
+
+#ifdef POSIX_TEST
+#include "onigposix.h"
+#else
+#include "oniguruma.h"
+#endif
+
+static int nsucc  = 0;
+static int nfail  = 0;
+static int nerror = 0;
+
+static FILE* err_file;
+
+#ifndef POSIX_TEST
+static OnigRegion* region;
+static OnigEncoding ENC;
+#endif
+
+#define ulen(p) onigenc_str_bytelen_null(ENC, (UChar* )p)
+
+static void uconv(char* from, char* to, int len)
+{
+  int i;
+  unsigned char c;
+  char *q;
+
+  q = to;
+
+  for (i = 0; i < len; i += 2) {
+    c = (unsigned char )from[i];
+    if (c == 0) {
+      c = (unsigned char )from[i+1];
+      if (c < 0x20 || c >= 0x7f || c == 0x5c || c == 0x22) {
+        sprintf(q, "\\%03o", c);
+        q += 4;
+      }
+      else {
+        sprintf(q, "%c", c);
+        q++;
+      }
+    }
+    else {
+      sprintf(q, "\\%03o", c);
+      q += 4;
+      c = (unsigned char )from[i+1];
+      sprintf(q, "\\%03o", c);
+      q += 4;
+    }
+  }
+
+  *q = 0;
+}
+
+static void xx(char* pattern, char* str, int from, int to, int mem, int not)
+{
+  int r;
+  char cpat[4000], cstr[4000];
+
+#ifdef POSIX_TEST
+  regex_t reg;
+  char buf[200];
+  regmatch_t pmatch[20];
+
+  uconv(pattern, cpat, ulen(pattern));
+  uconv(str,     cstr, ulen(str));
+
+  r = regcomp(&reg, pattern, REG_EXTENDED | REG_NEWLINE);
+  if (r) {
+    regerror(r, &reg, buf, sizeof(buf));
+    fprintf(err_file, "ERROR: %s\n", buf);
+    nerror++;
+    return ;
+  }
+
+  r = regexec(&reg, str, reg.re_nsub + 1, pmatch, 0);
+  if (r != 0 && r != REG_NOMATCH) {
+    regerror(r, &reg, buf, sizeof(buf));
+    fprintf(err_file, "ERROR: %s\n", buf);
+    nerror++;
+    return ;
+  }
+
+  if (r == REG_NOMATCH) {
+    if (not) {
+      fprintf(stdout, "OK(N): /%s/ '%s'\n", cpat, cstr);
+      nsucc++;
+    }
+    else {
+      fprintf(stdout, "FAIL: /%s/ '%s'\n", cpat, cstr);
+      nfail++;
+    }
+  }
+  else {
+    if (not) {
+      fprintf(stdout, "FAIL(N): /%s/ '%s'\n", cpat, cstr);
+      nfail++;
+    }
+    else {
+      if (pmatch[mem].rm_so == from && pmatch[mem].rm_eo == to) {
+        fprintf(stdout, "OK: /%s/ '%s'\n", cpat, cstr);
+        nsucc++;
+      }
+      else {
+        fprintf(stdout, "FAIL: /%s/ '%s' %d-%d : %d-%d\n", cpat, cstr,
+               from, to, pmatch[mem].rm_so, pmatch[mem].rm_eo);
+        nfail++;
+      }
+    }
+  }
+  regfree(&reg);
+
+#else
+  regex_t* reg;
+  OnigCompileInfo ci;
+  OnigErrorInfo einfo;
+
+  uconv(pattern, cpat, ulen(pattern));
+  uconv(str,     cstr, ulen(str));
+
+#if 0
+  r = onig_new(&reg, (UChar* )pattern, (UChar* )(pattern + ulen(pattern)),
+              ONIG_OPTION_DEFAULT, ENC, ONIG_SYNTAX_DEFAULT, &einfo);
+#else
+  ci.num_of_elements = 5;
+  ci.pattern_enc = ENC;
+  ci.target_enc  = ENC;
+  ci.syntax      = ONIG_SYNTAX_DEFAULT;
+  ci.option      = ONIG_OPTION_DEFAULT;
+  ci.case_fold_flag = ONIGENC_CASE_FOLD_DEFAULT;
+
+  r = onig_new_deluxe(&reg, (UChar* )pattern,
+          (UChar* )(pattern + ulen(pattern)),
+          &ci, &einfo);
+#endif
+
+  if (r) {
+    char s[ONIG_MAX_ERROR_MESSAGE_LEN];
+    onig_error_code_to_str(s, r, &einfo);
+    fprintf(err_file, "ERROR: %s\n", s);
+    nerror++;
+    return ;
+  }
+
+  r = onig_search(reg, (UChar* )str, (UChar* )(str + ulen(str)),
+                 (UChar* )str, (UChar* )(str + ulen(str)),
+                 region, ONIG_OPTION_NONE);
+  if (r < ONIG_MISMATCH) {
+    char s[ONIG_MAX_ERROR_MESSAGE_LEN];
+    onig_error_code_to_str(s, r);
+    fprintf(err_file, "ERROR: %s\n", s);
+    nerror++;
+    return ;
+  }
+
+  if (r == ONIG_MISMATCH) {
+    if (not) {
+      fprintf(stdout, "OK(N): /%s/ '%s'\n", cpat, cstr);
+      nsucc++;
+    }
+    else {
+      fprintf(stdout, "FAIL: /%s/ '%s'\n", cpat, cstr);
+      nfail++;
+    }
+  }
+  else {
+    if (not) {
+      fprintf(stdout, "FAIL(N): /%s/ '%s'\n", cpat, cstr);
+      nfail++;
+    }
+    else {
+      if (region->beg[mem] == from && region->end[mem] == to) {
+        fprintf(stdout, "OK: /%s/ '%s'\n", cpat, cstr);
+        nsucc++;
+      }
+      else {
+        fprintf(stdout, "FAIL: /%s/ '%s' %d-%d : %d-%d\n", cpat, cstr,
+               from, to, region->beg[mem], region->end[mem]);
+        nfail++;
+      }
+    }
+  }
+  onig_free(reg);
+#endif
+}
+
+static void x2(char* pattern, char* str, int from, int to)
+{
+  xx(pattern, str, from, to, 0, 0);
+}
+
+static void x3(char* pattern, char* str, int from, int to, int mem)
+{
+  xx(pattern, str, from, to, mem, 0);
+}
+
+static void n(char* pattern, char* str)
+{
+  xx(pattern, str, 0, 0, 0, 1);
+}
+
+extern int main(int argc, char* argv[])
+{
+  err_file = stdout;
+
+#ifndef POSIX_TEST
+  region = onig_region_new();
+#endif
+#ifdef POSIX_TEST
+  reg_set_encoding(REG_POSIX_ENCODING_UTF16_BE);
+#else
+  ENC = ONIG_ENCODING_UTF16_BE;
+#endif
+  x2("\000\000", "\000\000", 0, 0);
+  x2("\000^\000\000", "\000\000", 0, 0);
+  x2("\000$\000\000", "\000\000", 0, 0);
+  x2("\000\134\000G\000\000", "\000\000", 0, 0);
+  x2("\000\134\000A\000\000", "\000\000", 0, 0);
+  x2("\000\134\000Z\000\000", "\000\000", 0, 0);
+  x2("\000\134\000z\000\000", "\000\000", 0, 0);
+  x2("\000^\000$\000\000", "\000\000", 0, 0);
+  x2("\000\134\000c\000a\000\000", "\000\001\000\000", 0, 2);
+  x2("\000\134\000C\000-\000b\000\000", "\000\002\000\000", 0, 2);
+  x2("\000\134\000c\000\134\000\134\000\000", "\000\034\000\000", 0, 2);
+  x2("\000q\000[\000\134\000c\000\134\000\134\000]\000\000", "\000q\000\034\000\000", 0, 4);
+  x2("\000\000", "\000a\000\000", 0, 0);
+  x2("\000a\000\000", "\000a\000\000", 0, 2);
+  x2("\000\134\000x\0000\0000\000\134\000x\0006\0001\000\000", "\000a\000\000", 0, 2);
+  x2("\000a\000a\000\000", "\000a\000a\000\000", 0, 4);
+  x2("\000a\000a\000a\000\000", "\000a\000a\000a\000\000", 0, 6);
+  x2("\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000\000", "\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000a\000\000", 0, 70);
+  x2("\000a\000b\000\000", "\000a\000b\000\000", 0, 4);
+  x2("\000b\000\000", "\000a\000b\000\000", 2, 4);
+  x2("\000b\000c\000\000", "\000a\000b\000c\000\000", 2, 6);
+  x2("\000(\000?\000i\000:\000#\000R\000E\000T\000#\000)\000\000", "\000#\000I\000N\000S\000#\000#\000R\000E\000T\000#\000\000", 10, 20);
+  x2("\000\134\0000\0000\0000\000\134\0001\0007\000\000", "\000\017\000\000", 0, 2);
+  x2("\000\134\000x\0000\0000\000\134\000x\0001\000f\000\000", "\000\037\000\000", 0, 2);
+  x2("\000a\000(\000?\000#\000.\000.\000.\000.\000\134\000\134\000J\000J\000J\000J\000)\000b\000\000", "\000a\000b\000\000", 0, 4);
+  x2("\000(\000?\000x\000)\000 \000 \000G\000 \000(\000o\000 \000O\000(\000?\000-\000x\000)\000o\000O\000)\000 \000g\000 \000L\000\000", "\000G\000o\000O\000o\000O\000g\000L\000e\000\000", 0, 14);
+  x2("\000.\000\000", "\000a\000\000", 0, 2);
+  n("\000.\000\000", "\000\000");
+  x2("\000.\000.\000\000", "\000a\000b\000\000", 0, 4);
+  x2("\000\134\000w\000\000", "\000e\000\000", 0, 2);
+  n("\000\134\000W\000\000", "\000e\000\000");
+  x2("\000\134\000s\000\000", "\000 \000\000", 0, 2);
+  x2("\000\134\000S\000\000", "\000b\000\000", 0, 2);
+  x2("\000\134\000d\000\000", "\0004\000\000", 0, 2);
+  n("\000\134\000D\000\000", "\0004\000\000");
+  x2("\000\134\000b\000\000", "\000z\000 \000\000", 0, 0);
+  x2("\000\134\000b\000\000", "\000 \000z\000\000", 2, 2);
+  x2("\000\134\000B\000\000", "\000z\000z\000 \000\000", 2, 2);
+  x2("\000\134\000B\000\000", "\000z\000 \000\000", 4, 4);
+  x2("\000\134\000B\000\000", "\000 \000z\000\000", 0, 0);
+  x2("\000[\000a\000b\000]\000\000", "\000b\000\000", 0, 2);
+  n("\000[\000a\000b\000]\000\000", "\000c\000\000");
+  x2("\000[\000a\000-\000z\000]\000\000", "\000t\000\000", 0, 2);
+  n("\000[\000^\000a\000]\000\000", "\000a\000\000");
+  x2("\000[\000^\000a\000]\000\000", "\000\012\000\000", 0, 2);
+  x2("\000[\000]\000]\000\000", "\000]\000\000", 0, 2);
+  n("\000[\000^\000]\000]\000\000", "\000]\000\000");
+  x2("\000[\000\134\000^\000]\000+\000\000", "\0000\000^\000^\0001\000\000", 2, 6);
+  x2("\000[\000b\000-\000]\000\000", "\000b\000\000", 0, 2);
+  x2("\000[\000b\000-\000]\000\000", "\000-\000\000", 0, 2);
+  x2("\000[\000\134\000w\000]\000\000", "\000z\000\000", 0, 2);
+  n("\000[\000\134\000w\000]\000\000", "\000 \000\000");
+  x2("\000[\000\134\000W\000]\000\000", "\000b\000$\000\000", 2, 4);
+  x2("\000[\000\134\000d\000]\000\000", "\0005\000\000", 0, 2);
+  n("\000[\000\134\000d\000]\000\000", "\000e\000\000");
+  x2("\000[\000\134\000D\000]\000\000", "\000t\000\000", 0, 2);
+  n("\000[\000\134\000D\000]\000\000", "\0003\000\000");
+  x2("\000[\000\134\000s\000]\000\000", "\000 \000\000", 0, 2);
+  n("\000[\000\134\000s\000]\000\000", "\000a\000\000");
+  x2("\000[\000\134\000S\000]\000\000", "\000b\000\000", 0, 2);
+  n("\000[\000\134\000S\000]\000\000", "\000 \000\000");
+  x2("\000[\000\134\000w\000\134\000d\000]\000\000", "\0002\000\000", 0, 2);
+  n("\000[\000\134\000w\000\134\000d\000]\000\000", "\000 \000\000");
+  x2("\000[\000[\000:\000u\000p\000p\000e\000r\000:\000]\000]\000\000", "\000B\000\000", 0, 2);
+  x2("\000[\000*\000[\000:\000x\000d\000i\000g\000i\000t\000:\000]\000+\000]\000\000", "\000+\000\000", 0, 2);
+  x2("\000[\000*\000[\000:\000x\000d\000i\000g\000i\000t\000:\000]\000+\000]\000\000", "\000G\000H\000I\000K\000K\000-\0009\000+\000*\000\000", 12, 14);
+  x2("\000[\000*\000[\000:\000x\000d\000i\000g\000i\000t\000:\000]\000+\000]\000\000", "\000-\000@\000^\000+\000\000", 6, 8);
+  n("\000[\000[\000:\000u\000p\000p\000e\000r\000]\000]\000\000", "\000A\000\000");
+  x2("\000[\000[\000:\000u\000p\000p\000e\000r\000]\000]\000\000", "\000:\000\000", 0, 2);
+  x2("\000[\000\134\0000\0000\0000\000\134\0000\0004\0004\000-\000\134\0000\0000\0000\000\134\0000\0004\0007\000]\000\000", "\000&\000\000", 0, 2);
+  x2("\000[\000\134\000x\0000\0000\000\134\000x\0005\000a\000-\000\134\000x\0000\0000\000\134\000x\0005\000c\000]\000\000", "\000[\000\000", 0, 2);
+  x2("\000[\000\134\000x\0000\0000\000\134\000x\0006\000A\000-\000\134\000x\0000\0000\000\134\000x\0006\000D\000]\000\000", "\000l\000\000", 0, 2);
+  n("\000[\000\134\000x\0000\0000\000\134\000x\0006\000A\000-\000\134\000x\0000\0000\000\134\000x\0006\000D\000]\000\000", "\000n\000\000");
+  n("\000^\000[\0000\000-\0009\000A\000-\000F\000]\000+\000 \0000\000+\000 \000U\000N\000D\000E\000F\000 \000\000", "\0007\0005\000F\000 \0000\0000\0000\0000\0000\0000\0000\0000\000 \000S\000E\000C\000T\0001\0004\000A\000 \000n\000o\000t\000y\000p\000e\000 \000(\000)\000 \000 \000 \000 \000E\000x\000t\000e\000r\000n\000a\000l\000 \000 \000 \000 \000|\000 \000_\000r\000b\000_\000a\000p\000p\000l\000y\000\000");
+  x2("\000[\000\134\000[\000]\000\000", "\000[\000\000", 0, 2);
+  x2("\000[\000\134\000]\000]\000\000", "\000]\000\000", 0, 2);
+  x2("\000[\000&\000]\000\000", "\000&\000\000", 0, 2);
+  x2("\000[\000[\000a\000b\000]\000]\000\000", "\000b\000\000", 0, 2);
+  x2("\000[\000[\000a\000b\000]\000c\000]\000\000", "\000c\000\000", 0, 2);
+  n("\000[\000[\000^\000a\000]\000]\000\000", "\000a\000\000");
+  n("\000[\000^\000[\000a\000]\000]\000\000", "\000a\000\000");
+  x2("\000[\000[\000a\000b\000]\000&\000&\000b\000c\000]\000\000", "\000b\000\000", 0, 2);
+  n("\000[\000[\000a\000b\000]\000&\000&\000b\000c\000]\000\000", "\000a\000\000");
+  n("\000[\000[\000a\000b\000]\000&\000&\000b\000c\000]\000\000", "\000c\000\000");
+  x2("\000[\000a\000-\000z\000&\000&\000b\000-\000y\000&\000&\000c\000-\000x\000]\000\000", "\000w\000\000", 0, 2);
+  n("\000[\000^\000a\000-\000z\000&\000&\000b\000-\000y\000&\000&\000c\000-\000x\000]\000\000", "\000w\000\000");
+  x2("\000[\000[\000^\000a\000&\000&\000a\000]\000&\000&\000a\000-\000z\000]\000\000", "\000b\000\000", 0, 2);
+  n("\000[\000[\000^\000a\000&\000&\000a\000]\000&\000&\000a\000-\000z\000]\000\000", "\000a\000\000");
+  x2("\000[\000[\000^\000a\000-\000z\000&\000&\000b\000c\000d\000e\000f\000]\000&\000&\000[\000^\000c\000-\000g\000]\000]\000\000", "\000h\000\000", 0, 2);
+  n("\000[\000[\000^\000a\000-\000z\000&\000&\000b\000c\000d\000e\000f\000]\000&\000&\000[\000^\000c\000-\000g\000]\000]\000\000", "\000c\000\000");
+  x2("\000[\000^\000[\000^\000a\000b\000c\000]\000&\000&\000[\000^\000c\000d\000e\000]\000]\000\000", "\000c\000\000", 0, 2);
+  x2("\000[\000^\000[\000^\000a\000b\000c\000]\000&\000&\000[\000^\000c\000d\000e\000]\000]\000\000", "\000e\000\000", 0, 2);
+  n("\000[\000^\000[\000^\000a\000b\000c\000]\000&\000&\000[\000^\000c\000d\000e\000]\000]\000\000", "\000f\000\000");
+  x2("\000[\000a\000-\000&\000&\000-\000a\000]\000\000", "\000-\000\000", 0, 2);
+  n("\000[\000a\000\134\000-\000&\000&\000\134\000-\000a\000]\000\000", "\000&\000\000");
+  n("\000\134\000w\000a\000b\000c\000\000", "\000 \000a\000b\000c\000\000");
+  x2("\000a\000\134\000W\000b\000c\000\000", "\000a\000 \000b\000c\000\000", 0, 8);
+  x2("\000a\000.\000b\000.\000c\000\000", "\000a\000a\000b\000b\000c\000\000", 0, 10);
+  x2("\000.\000\134\000w\000b\000\134\000W\000.\000.\000c\000\000", "\000a\000b\000b\000 \000b\000c\000c\000\000", 0, 14);
+  x2("\000\134\000s\000\134\000w\000z\000z\000z\000\000", "\000 \000z\000z\000z\000z\000\000", 0, 10);
+  x2("\000a\000a\000.\000b\000\000", "\000a\000a\000b\000b\000\000", 0, 8);
+  n("\000.\000a\000\000", "\000a\000b\000\000");
+  x2("\000.\000a\000\000", "\000a\000a\000\000", 0, 4);
+  x2("\000^\000a\000\000", "\000a\000\000", 0, 2);
+  x2("\000^\000a\000$\000\000", "\000a\000\000", 0, 2);
+  x2("\000^\000\134\000w\000$\000\000", "\000a\000\000", 0, 2);
+  n("\000^\000\134\000w\000$\000\000", "\000 \000\000");
+  x2("\000^\000\134\000w\000a\000b\000$\000\000", "\000z\000a\000b\000\000", 0, 6);
+  x2("\000^\000\134\000w\000a\000b\000c\000d\000e\000f\000$\000\000", "\000z\000a\000b\000c\000d\000e\000f\000\000", 0, 14);
+  x2("\000^\000\134\000w\000.\000.\000.\000d\000e\000f\000$\000\000", "\000z\000a\000b\000c\000d\000e\000f\000\000", 0, 14);
+  x2("\000\134\000w\000\134\000w\000\134\000s\000\134\000W\000a\000a\000a\000\134\000d\000\000", "\000a\000a\000 \000 \000a\000a\000a\0004\000\000", 0, 16);
+  x2("\000\134\000A\000\134\000Z\000\000", "\000\000", 0, 0);
+  x2("\000\134\000A\000x\000y\000z\000\000", "\000x\000y\000z\000\000", 0, 6);
+  x2("\000x\000y\000z\000\134\000Z\000\000", "\000x\000y\000z\000\000", 0, 6);
+  x2("\000x\000y\000z\000\134\000z\000\000", "\000x\000y\000z\000\000", 0, 6);
+  x2("\000a\000\134\000Z\000\000", "\000a\000\000", 0, 2);
+  x2("\000\134\000G\000a\000z\000\000", "\000a\000z\000\000", 0, 4);
+  n("\000\134\000G\000z\000\000", "\000b\000z\000a\000\000");
+  n("\000a\000z\000\134\000G\000\000", "\000a\000z\000\000");
+  n("\000a\000z\000\134\000A\000\000", "\000a\000z\000\000");
+  n("\000a\000\134\000A\000z\000\000", "\000a\000z\000\000");
+  x2("\000\134\000^\000\134\000$\000\000", "\000^\000$\000\000", 0, 4);
+  x2("\000^\000x\000?\000y\000\000", "\000x\000y\000\000", 0, 4);
+  x2("\000^\000(\000x\000?\000y\000)\000\000", "\000x\000y\000\000", 0, 4);
+  x2("\000\134\000w\000\000", "\000_\000\000", 0, 2);
+  n("\000\134\000W\000\000", "\000_\000\000");
+  x2("\000(\000?\000=\000z\000)\000z\000\000", "\000z\000\000", 0, 2);
+  n("\000(\000?\000=\000z\000)\000.\000\000", "\000a\000\000");
+  x2("\000(\000?\000!\000z\000)\000a\000\000", "\000a\000\000", 0, 2);
+  n("\000(\000?\000!\000z\000)\000a\000\000", "\000z\000\000");
+  x2("\000(\000?\000i\000:\000a\000)\000\000", "\000a\000\000", 0, 2);
+  x2("\000(\000?\000i\000:\000a\000)\000\000", "\000A\000\000", 0, 2);
+  x2("\000(\000?\000i\000:\000A\000)\000\000", "\000a\000\000", 0, 2);
+  n("\000(\000?\000i\000:\000A\000)\000\000", "\000b\000\000");
+  x2("\000(\000?\000i\000:\000[\000A\000-\000Z\000]\000)\000\000", "\000a\000\000", 0, 2);
+  x2("\000(\000?\000i\000:\000[\000f\000-\000m\000]\000)\000\000", "\000H\000\000", 0, 2);
+  x2("\000(\000?\000i\000:\000[\000f\000-\000m\000]\000)\000\000", "\000h\000\000", 0, 2);
+  n("\000(\000?\000i\000:\000[\000f\000-\000m\000]\000)\000\000", "\000e\000\000");
+  x2("\000(\000?\000i\000:\000[\000A\000-\000c\000]\000)\000\000", "\000D\000\000", 0, 2);
+  n("\000(\000?\000i\000:\000[\000^\000a\000-\000z\000]\000)\000\000", "\000A\000\000");
+  n("\000(\000?\000i\000:\000[\000^\000a\000-\000z\000]\000)\000\000", "\000a\000\000");
+  x2("\000(\000?\000i\000:\000[\000!\000-\000k\000]\000)\000\000", "\000Z\000\000", 0, 2);
+  x2("\000(\000?\000i\000:\000[\000!\000-\000k\000]\000)\000\000", "\0007\000\000", 0, 2);
+  x2("\000(\000?\000i\000:\000[\000T\000-\000}\000]\000)\000\000", "\000b\000\000", 0, 2);
+  x2("\000(\000?\000i\000:\000[\000T\000-\000}\000]\000)\000\000", "\000{\000\000", 0, 2);
+  x2("\000(\000?\000i\000:\000\134\000?\000a\000)\000\000", "\000?\000A\000\000", 0, 4);
+  x2("\000(\000?\000i\000:\000\134\000*\000A\000)\000\000", "\000*\000a\000\000", 0, 4);
+  n("\000.\000\000", "\000\012\000\000");
+  x2("\000(\000?\000m\000:\000.\000)\000\000", "\000\012\000\000", 0, 2);
+  x2("\000(\000?\000m\000:\000a\000.\000)\000\000", "\000a\000\012\000\000", 0, 4);
+  x2("\000(\000?\000m\000:\000.\000b\000)\000\000", "\000a\000\012\000b\000\000", 2, 6);
+  x2("\000.\000*\000a\000b\000c\000\000", "\000d\000d\000d\000a\000b\000d\000d\000\012\000d\000d\000a\000b\000c\000\000", 16, 26);
+  x2("\000(\000?\000m\000:\000.\000*\000a\000b\000c\000)\000\000", "\000d\000d\000d\000a\000b\000d\000d\000a\000b\000c\000\000", 0, 20);
+  n("\000(\000?\000i\000)\000(\000?\000-\000i\000)\000a\000\000", "\000A\000\000");
+  n("\000(\000?\000i\000)\000(\000?\000-\000i\000:\000a\000)\000\000", "\000A\000\000");
+  x2("\000a\000?\000\000", "\000\000", 0, 0);
+  x2("\000a\000?\000\000", "\000b\000\000", 0, 0);
+  x2("\000a\000?\000\000", "\000a\000\000", 0, 2);
+  x2("\000a\000*\000\000", "\000\000", 0, 0);
+  x2("\000a\000*\000\000", "\000a\000\000", 0, 2);
+  x2("\000a\000*\000\000", "\000a\000a\000a\000\000", 0, 6);
+  x2("\000a\000*\000\000", "\000b\000a\000a\000a\000a\000\000", 0, 0);
+  n("\000a\000+\000\000", "\000\000");
+  x2("\000a\000+\000\000", "\000a\000\000", 0, 2);
+  x2("\000a\000+\000\000", "\000a\000a\000a\000a\000\000", 0, 8);
+  x2("\000a\000+\000\000", "\000a\000a\000b\000b\000b\000\000", 0, 4);
+  x2("\000a\000+\000\000", "\000b\000a\000a\000a\000a\000\000", 2, 10);
+  x2("\000.\000?\000\000", "\000\000", 0, 0);
+  x2("\000.\000?\000\000", "\000f\000\000", 0, 2);
+  x2("\000.\000?\000\000", "\000\012\000\000", 0, 0);
+  x2("\000.\000*\000\000", "\000\000", 0, 0);
+  x2("\000.\000*\000\000", "\000a\000b\000c\000d\000e\000\000", 0, 10);
+  x2("\000.\000+\000\000", "\000z\000\000", 0, 2);
+  x2("\000.\000+\000\000", "\000z\000d\000s\000w\000e\000r\000\012\000\000", 0, 12);
+  x2("\000(\000.\000*\000)\000a\000\134\0001\000f\000\000", "\000b\000a\000b\000f\000b\000a\000c\000\000", 0, 8);
+  x2("\000(\000.\000*\000)\000a\000\134\0001\000f\000\000", "\000b\000a\000c\000b\000a\000b\000f\000\000", 6, 14);
+  x2("\000(\000(\000.\000*\000)\000a\000\134\0002\000f\000)\000\000", "\000b\000a\000c\000b\000a\000b\000f\000\000", 6, 14);
+  x2("\000(\000.\000*\000)\000a\000\134\0001\000f\000\000", "\000b\000a\000c\000z\000z\000z\000z\000z\000z\000\012\000b\000a\000z\000z\000\012\000z\000z\000z\000z\000b\000a\000b\000f\000\000", 38, 46);
+  x2("\000a\000|\000b\000\000", "\000a\000\000", 0, 2);
+  x2("\000a\000|\000b\000\000", "\000b\000\000", 0, 2);
+  x2("\000|\000a\000\000", "\000a\000\000", 0, 0);
+  x2("\000(\000|\000a\000)\000\000", "\000a\000\000", 0, 0);
+  x2("\000a\000b\000|\000b\000c\000\000", "\000a\000b\000\000", 0, 4);
+  x2("\000a\000b\000|\000b\000c\000\000", "\000b\000c\000\000", 0, 4);
+  x2("\000z\000(\000?\000:\000a\000b\000|\000b\000c\000)\000\000", "\000z\000b\000c\000\000", 0, 6);
+  x2("\000a\000(\000?\000:\000a\000b\000|\000b\000c\000)\000c\000\000", "\000a\000a\000b\000c\000\000", 0, 8);
+  x2("\000a\000b\000|\000(\000?\000:\000a\000c\000|\000a\000z\000)\000\000", "\000a\000z\000\000", 0, 4);
+  x2("\000a\000|\000b\000|\000c\000\000", "\000d\000c\000\000", 2, 4);
+  x2("\000a\000|\000b\000|\000c\000d\000|\000e\000f\000g\000|\000h\000|\000i\000j\000k\000|\000l\000m\000n\000|\000o\000|\000p\000q\000|\000r\000s\000t\000u\000v\000w\000x\000|\000y\000z\000\000", "\000p\000q\000r\000\000", 0, 4);
+  n("\000a\000|\000b\000|\000c\000d\000|\000e\000f\000g\000|\000h\000|\000i\000j\000k\000|\000l\000m\000n\000|\000o\000|\000p\000q\000|\000r\000s\000t\000u\000v\000w\000x\000|\000y\000z\000\000", "\000m\000n\000\000");
+  x2("\000a\000|\000^\000z\000\000", "\000b\000a\000\000", 2, 4);
+  x2("\000a\000|\000^\000z\000\000", "\000z\000a\000\000", 0, 2);
+  x2("\000a\000|\000\134\000G\000z\000\000", "\000b\000z\000a\000\000", 4, 6);
+  x2("\000a\000|\000\134\000G\000z\000\000", "\000z\000a\000\000", 0, 2);
+  x2("\000a\000|\000\134\000A\000z\000\000", "\000b\000z\000a\000\000", 4, 6);
+  x2("\000a\000|\000\134\000A\000z\000\000", "\000z\000a\000\000", 0, 2);
+  x2("\000a\000|\000b\000\134\000Z\000\000", "\000b\000a\000\000", 2, 4);
+  x2("\000a\000|\000b\000\134\000Z\000\000", "\000b\000\000", 0, 2);
+  x2("\000a\000|\000b\000\134\000z\000\000", "\000b\000a\000\000", 2, 4);
+  x2("\000a\000|\000b\000\134\000z\000\000", "\000b\000\000", 0, 2);
+  x2("\000\134\000w\000|\000\134\000s\000\000", "\000 \000\000", 0, 2);
+  n("\000\134\000w\000|\000\134\000w\000\000", "\000 \000\000");
+  x2("\000\134\000w\000|\000%\000\000", "\000%\000\000", 0, 2);
+  x2("\000\134\000w\000|\000[\000&\000$\000]\000\000", "\000&\000\000", 0, 2);
+  x2("\000[\000b\000-\000d\000]\000|\000[\000^\000e\000-\000z\000]\000\000", "\000a\000\000", 0, 2);
+  x2("\000(\000?\000:\000a\000|\000[\000c\000-\000f\000]\000)\000|\000b\000z\000\000", "\000d\000z\000\000", 0, 2);
+  x2("\000(\000?\000:\000a\000|\000[\000c\000-\000f\000]\000)\000|\000b\000z\000\000", "\000b\000z\000\000", 0, 4);
+  x2("\000a\000b\000c\000|\000(\000?\000=\000z\000z\000)\000.\000.\000f\000\000", "\000z\000z\000f\000\000", 0, 6);
+  x2("\000a\000b\000c\000|\000(\000?\000!\000z\000z\000)\000.\000.\000f\000\000", "\000a\000b\000f\000\000", 0, 6);
+  x2("\000(\000?\000=\000z\000a\000)\000.\000.\000a\000|\000(\000?\000=\000z\000z\000)\000.\000.\000a\000\000", "\000z\000z\000a\000\000", 0, 6);
+  n("\000(\000?\000>\000a\000|\000a\000b\000d\000)\000c\000\000", "\000a\000b\000d\000c\000\000");
+  x2("\000(\000?\000>\000a\000b\000d\000|\000a\000)\000c\000\000", "\000a\000b\000d\000c\000\000", 0, 8);
+  x2("\000a\000?\000|\000b\000\000", "\000a\000\000", 0, 2);
+  x2("\000a\000?\000|\000b\000\000", "\000b\000\000", 0, 0);
+  x2("\000a\000?\000|\000b\000\000", "\000\000", 0, 0);
+  x2("\000a\000*\000|\000b\000\000", "\000a\000a\000\000", 0, 4);
+  x2("\000a\000*\000|\000b\000*\000\000", "\000b\000a\000\000", 0, 0);
+  x2("\000a\000*\000|\000b\000*\000\000", "\000a\000b\000\000", 0, 2);
+  x2("\000a\000+\000|\000b\000*\000\000", "\000\000", 0, 0);
+  x2("\000a\000+\000|\000b\000*\000\000", "\000b\000b\000b\000\000", 0, 6);
+  x2("\000a\000+\000|\000b\000*\000\000", "\000a\000b\000b\000b\000\000", 0, 2);
+  n("\000a\000+\000|\000b\000+\000\000", "\000\000");
+  x2("\000(\000a\000|\000b\000)\000?\000\000", "\000b\000\000", 0, 2);
+  x2("\000(\000a\000|\000b\000)\000*\000\000", "\000b\000a\000\000", 0, 4);
+  x2("\000(\000a\000|\000b\000)\000+\000\000", "\000b\000a\000b\000\000", 0, 6);
+  x2("\000(\000a\000b\000|\000c\000a\000)\000+\000\000", "\000c\000a\000a\000b\000b\000c\000\000", 0, 8);
+  x2("\000(\000a\000b\000|\000c\000a\000)\000+\000\000", "\000a\000a\000b\000c\000a\000\000", 2, 10);
+  x2("\000(\000a\000b\000|\000c\000a\000)\000+\000\000", "\000a\000b\000z\000c\000a\000\000", 0, 4);
+  x2("\000(\000a\000|\000b\000a\000b\000)\000+\000\000", "\000a\000b\000a\000b\000a\000\000", 0, 10);
+  x2("\000(\000a\000|\000b\000a\000b\000)\000+\000\000", "\000b\000a\000\000", 2, 4);
+  x2("\000(\000a\000|\000b\000a\000b\000)\000+\000\000", "\000b\000a\000a\000a\000b\000a\000\000", 2, 8);
+  x2("\000(\000?\000:\000a\000|\000b\000)\000(\000?\000:\000a\000|\000b\000)\000\000", "\000a\000b\000\000", 0, 4);
+  x2("\000(\000?\000:\000a\000*\000|\000b\000*\000)\000(\000?\000:\000a\000*\000|\000b\000*\000)\000\000", "\000a\000a\000a\000b\000b\000b\000\000", 0, 6);
+  x2("\000(\000?\000:\000a\000*\000|\000b\000*\000)\000(\000?\000:\000a\000+\000|\000b\000+\000)\000\000", "\000a\000a\000a\000b\000b\000b\000\000", 0, 12);
+  x2("\000(\000?\000:\000a\000+\000|\000b\000+\000)\000{\0002\000}\000\000", "\000a\000a\000a\000b\000b\000b\000\000", 0, 12);
+  x2("\000h\000{\0000\000,\000}\000\000", "\000h\000h\000h\000h\000\000", 0, 8);
+  x2("\000(\000?\000:\000a\000+\000|\000b\000+\000)\000{\0001\000,\0002\000}\000\000", "\000a\000a\000a\000b\000b\000b\000\000", 0, 12);
+  n("\000a\000x\000{\0002\000}\000*\000a\000\000", "\0000\000a\000x\000x\000x\000a\0001\000\000");
+  n("\000a\000.\000{\0000\000,\0002\000}\000a\000\000", "\0000\000a\000X\000X\000X\000a\0000\000\000");
+  n("\000a\000.\000{\0000\000,\0002\000}\000?\000a\000\000", "\0000\000a\000X\000X\000X\000a\0000\000\000");
+  n("\000a\000.\000{\0000\000,\0002\000}\000?\000a\000\000", "\0000\000a\000X\000X\000X\000X\000a\0000\000\000");
+  x2("\000^\000a\000{\0002\000,\000}\000?\000a\000$\000\000", "\000a\000a\000a\000\000", 0, 6);
+  x2("\000^\000[\000a\000-\000z\000]\000{\0002\000,\000}\000?\000$\000\000", "\000a\000a\000a\000\000", 0, 6);
+  x2("\000(\000?\000:\000a\000+\000|\000\134\000A\000b\000*\000)\000c\000c\000\000", "\000c\000c\000\000", 0, 4);
+  n("\000(\000?\000:\000a\000+\000|\000\134\000A\000b\000*\000)\000c\000c\000\000", "\000a\000b\000c\000c\000\000");
+  x2("\000(\000?\000:\000^\000a\000+\000|\000b\000+\000)\000*\000c\000\000", "\000a\000a\000b\000b\000b\000a\000b\000c\000\000", 12, 16);
+  x2("\000(\000?\000:\000^\000a\000+\000|\000b\000+\000)\000*\000c\000\000", "\000a\000a\000b\000b\000b\000b\000c\000\000", 0, 14);
+  x2("\000a\000|\000(\000?\000i\000)\000c\000\000", "\000C\000\000", 0, 2);
+  x2("\000(\000?\000i\000)\000c\000|\000a\000\000", "\000C\000\000", 0, 2);
+  x2("\000(\000?\000i\000)\000c\000|\000a\000\000", "\000A\000\000", 0, 2);
+  x2("\000(\000?\000i\000:\000c\000)\000|\000a\000\000", "\000C\000\000", 0, 2);
+  n("\000(\000?\000i\000:\000c\000)\000|\000a\000\000", "\000A\000\000");
+  x2("\000[\000a\000b\000c\000]\000?\000\000", "\000a\000b\000c\000\000", 0, 2);
+  x2("\000[\000a\000b\000c\000]\000*\000\000", "\000a\000b\000c\000\000", 0, 6);
+  x2("\000[\000^\000a\000b\000c\000]\000*\000\000", "\000a\000b\000c\000\000", 0, 0);
+  n("\000[\000^\000a\000b\000c\000]\000+\000\000", "\000a\000b\000c\000\000");
+  x2("\000a\000?\000?\000\000", "\000a\000a\000a\000\000", 0, 0);
+  x2("\000b\000a\000?\000?\000b\000\000", "\000b\000a\000b\000\000", 0, 6);
+  x2("\000a\000*\000?\000\000", "\000a\000a\000a\000\000", 0, 0);
+  x2("\000b\000a\000*\000?\000\000", "\000b\000a\000a\000\000", 0, 2);
+  x2("\000b\000a\000*\000?\000b\000\000", "\000b\000a\000a\000b\000\000", 0, 8);
+  x2("\000a\000+\000?\000\000", "\000a\000a\000a\000\000", 0, 2);
+  x2("\000b\000a\000+\000?\000\000", "\000b\000a\000a\000\000", 0, 4);
+  x2("\000b\000a\000+\000?\000b\000\000", "\000b\000a\000a\000b\000\000", 0, 8);
+  x2("\000(\000?\000:\000a\000?\000)\000?\000?\000\000", "\000a\000\000", 0, 0);
+  x2("\000(\000?\000:\000a\000?\000?\000)\000?\000\000", "\000a\000\000", 0, 0);
+  x2("\000(\000?\000:\000a\000?\000)\000+\000?\000\000", "\000a\000a\000a\000\000", 0, 2);
+  x2("\000(\000?\000:\000a\000+\000)\000?\000?\000\000", "\000a\000a\000a\000\000", 0, 0);
+  x2("\000(\000?\000:\000a\000+\000)\000?\000?\000b\000\000", "\000a\000a\000a\000b\000\000", 0, 8);
+  x2("\000(\000?\000:\000a\000b\000)\000?\000{\0002\000}\000\000", "\000\000", 0, 0);
+  x2("\000(\000?\000:\000a\000b\000)\000?\000{\0002\000}\000\000", "\000a\000b\000a\000b\000a\000\000", 0, 8);
+  x2("\000(\000?\000:\000a\000b\000)\000*\000{\0000\000}\000\000", "\000a\000b\000a\000b\000a\000\000", 0, 0);
+  x2("\000(\000?\000:\000a\000b\000)\000{\0003\000,\000}\000\000", "\000a\000b\000a\000b\000a\000b\000a\000b\000\000", 0, 16);
+  n("\000(\000?\000:\000a\000b\000)\000{\0003\000,\000}\000\000", "\000a\000b\000a\000b\000\000");
+  x2("\000(\000?\000:\000a\000b\000)\000{\0002\000,\0004\000}\000\000", "\000a\000b\000a\000b\000a\000b\000\000", 0, 12);
+  x2("\000(\000?\000:\000a\000b\000)\000{\0002\000,\0004\000}\000\000", "\000a\000b\000a\000b\000a\000b\000a\000b\000a\000b\000\000", 0, 16);
+  x2("\000(\000?\000:\000a\000b\000)\000{\0002\000,\0004\000}\000?\000\000", "\000a\000b\000a\000b\000a\000b\000a\000b\000a\000b\000\000", 0, 8);
+  x2("\000(\000?\000:\000a\000b\000)\000{\000,\000}\000\000", "\000a\000b\000{\000,\000}\000\000", 0, 10);
+  x2("\000(\000?\000:\000a\000b\000c\000)\000+\000?\000{\0002\000}\000\000", "\000a\000b\000c\000a\000b\000c\000a\000b\000c\000\000", 0, 12);
+  x2("\000(\000?\000:\000X\000*\000)\000(\000?\000i\000:\000x\000a\000)\000\000", "\000X\000X\000X\000a\000\000", 0, 8);
+  x2("\000(\000d\000+\000)\000(\000[\000^\000a\000b\000c\000]\000z\000)\000\000", "\000d\000d\000d\000z\000\000", 0, 8);
+  x2("\000(\000[\000^\000a\000b\000c\000]\000*\000)\000(\000[\000^\000a\000b\000c\000]\000z\000)\000\000", "\000d\000d\000d\000z\000\000", 0, 8);
+  x2("\000(\000\134\000w\000+\000)\000(\000\134\000w\000z\000)\000\000", "\000d\000d\000d\000z\000\000", 0, 8);
+  x3("\000(\000a\000)\000\000", "\000a\000\000", 0, 2, 1);
+  x3("\000(\000a\000b\000)\000\000", "\000a\000b\000\000", 0, 4, 1);
+  x2("\000(\000(\000a\000b\000)\000)\000\000", "\000a\000b\000\000", 0, 4);
+  x3("\000(\000(\000a\000b\000)\000)\000\000", "\000a\000b\000\000", 0, 4, 1);
+  x3("\000(\000(\000a\000b\000)\000)\000\000", "\000a\000b\000\000", 0, 4, 2);
+  x3("\000(\000(\000(\000(\000(\000(\000(\000(\000(\000(\000(\000(\000(\000(\000(\000(\000(\000(\000(\000(\000a\000b\000)\000)\000)\000)\000)\000)\000)\000)\000)\000)\000)\000)\000)\000)\000)\000)\000)\000)\000)\000)\000\000", "\000a\000b\000\000", 0, 4, 20);
+  x3("\000(\000a\000b\000)\000(\000c\000d\000)\000\000", "\000a\000b\000c\000d\000\000", 0, 4, 1);
+  x3("\000(\000a\000b\000)\000(\000c\000d\000)\000\000", "\000a\000b\000c\000d\000\000", 4, 8, 2);
+  x3("\000(\000)\000(\000a\000)\000b\000c\000(\000d\000e\000f\000)\000g\000h\000i\000j\000k\000\000", "\000a\000b\000c\000d\000e\000f\000g\000h\000i\000j\000k\000\000", 6, 12, 3);
+  x3("\000(\000(\000)\000(\000a\000)\000b\000c\000(\000d\000e\000f\000)\000g\000h\000i\000j\000k\000)\000\000", "\000a\000b\000c\000d\000e\000f\000g\000h\000i\000j\000k\000\000", 6, 12, 4);
+  x2("\000(\000^\000a\000)\000\000", "\000a\000\000", 0, 2);
+  x3("\000(\000a\000)\000|\000(\000a\000)\000\000", "\000b\000a\000\000", 2, 4, 1);
+  x3("\000(\000^\000a\000)\000|\000(\000a\000)\000\000", "\000b\000a\000\000", 2, 4, 2);
+  x3("\000(\000a\000?\000)\000\000", "\000a\000a\000a\000\000", 0, 2, 1);
+  x3("\000(\000a\000*\000)\000\000", "\000a\000a\000a\000\000", 0, 6, 1);
+  x3("\000(\000a\000*\000)\000\000", "\000\000", 0, 0, 1);
+  x3("\000(\000a\000+\000)\000\000", "\000a\000a\000a\000a\000a\000a\000a\000\000", 0, 14, 1);
+  x3("\000(\000a\000+\000|\000b\000*\000)\000\000", "\000b\000b\000b\000a\000a\000\000", 0, 6, 1);
+  x3("\000(\000a\000+\000|\000b\000?\000)\000\000", "\000b\000b\000b\000a\000a\000\000", 0, 2, 1);
+  x3("\000(\000a\000b\000c\000)\000?\000\000", "\000a\000b\000c\000\000", 0, 6, 1);
+  x3("\000(\000a\000b\000c\000)\000*\000\000", "\000a\000b\000c\000\000", 0, 6, 1);
+  x3("\000(\000a\000b\000c\000)\000+\000\000", "\000a\000b\000c\000\000", 0, 6, 1);
+  x3("\000(\000x\000y\000z\000|\000a\000b\000c\000)\000+\000\000", "\000a\000b\000c\000\000", 0, 6, 1);
+  x3("\000(\000[\000x\000y\000z\000]\000[\000a\000b\000c\000]\000|\000a\000b\000c\000)\000+\000\000", "\000a\000b\000c\000\000", 0, 6, 1);
+  x3("\000(\000(\000?\000i\000:\000a\000b\000c\000)\000)\000\000", "\000A\000b\000C\000\000", 0, 6, 1);
+  x2("\000(\000a\000b\000c\000)\000(\000?\000i\000:\000\134\0001\000)\000\000", "\000a\000b\000c\000A\000B\000C\000\000", 0, 12);
+  x3("\000(\000(\000?\000m\000:\000a\000.\000c\000)\000)\000\000", "\000a\000\012\000c\000\000", 0, 6, 1);
+  x3("\000(\000(\000?\000=\000a\000z\000)\000a\000)\000\000", "\000a\000z\000b\000\000", 0, 2, 1);
+  x3("\000a\000b\000c\000|\000(\000.\000a\000b\000d\000)\000\000", "\000z\000a\000b\000d\000\000", 0, 8, 1);
+  x2("\000(\000?\000:\000a\000b\000c\000)\000|\000(\000A\000B\000C\000)\000\000", "\000a\000b\000c\000\000", 0, 6);
+  x3("\000(\000?\000i\000:\000(\000a\000b\000c\000)\000)\000|\000(\000z\000z\000z\000)\000\000", "\000A\000B\000C\000\000", 0, 6, 1);
+  x3("\000a\000*\000(\000.\000)\000\000", "\000a\000a\000a\000a\000z\000\000", 8, 10, 1);
+  x3("\000a\000*\000?\000(\000.\000)\000\000", "\000a\000a\000a\000a\000z\000\000", 0, 2, 1);
+  x3("\000a\000*\000?\000(\000c\000)\000\000", "\000a\000a\000a\000a\000c\000\000", 8, 10, 1);
+  x3("\000[\000b\000c\000d\000]\000a\000*\000(\000.\000)\000\000", "\000c\000a\000a\000a\000a\000z\000\000", 10, 12, 1);
+  x3("\000(\000\134\000A\000b\000b\000)\000c\000c\000\000", "\000b\000b\000c\000c\000\000", 0, 4, 1);
+  n("\000(\000\134\000A\000b\000b\000)\000c\000c\000\000", "\000z\000b\000b\000c\000c\000\000");
+  x3("\000(\000^\000b\000b\000)\000c\000c\000\000", "\000b\000b\000c\000c\000\000", 0, 4, 1);
+  n("\000(\000^\000b\000b\000)\000c\000c\000\000", "\000z\000b\000b\000c\000c\000\000");
+  x3("\000c\000c\000(\000b\000b\000$\000)\000\000", "\000c\000c\000b\000b\000\000", 4, 8, 1);
+  n("\000c\000c\000(\000b\000b\000$\000)\000\000", "\000c\000c\000b\000b\000b\000\000");
+  n("\000(\000\134\0001\000)\000\000", "\000\000");
+  n("\000\134\0001\000(\000a\000)\000\000", "\000a\000a\000\000");
+  n("\000(\000a\000(\000b\000)\000\134\0001\000)\000\134\0002\000+\000\000", "\000a\000b\000a\000b\000b\000\000");
+  n("\000(\000?\000:\000(\000?\000:\000\134\0001\000|\000z\000)\000(\000a\000)\000)\000+\000$\000\000", "\000z\000a\000a\000\000");
+  x2("\000(\000?\000:\000(\000?\000:\000\134\0001\000|\000z\000)\000(\000a\000)\000)\000+\000$\000\000", "\000z\000a\000a\000a\000\000", 0, 8);
+  x2("\000(\000a\000)\000(\000?\000=\000\134\0001\000)\000\000", "\000a\000a\000\000", 0, 2);
+  n("\000(\000a\000)\000$\000|\000\134\0001\000\000", "\000a\000z\000\000");
+  x2("\000(\000a\000)\000\134\0001\000\000", "\000a\000a\000\000", 0, 4);
+  n("\000(\000a\000)\000\134\0001\000\000", "\000a\000b\000\000");
+  x2("\000(\000a\000?\000)\000\134\0001\000\000", "\000a\000a\000\000", 0, 4);
+  x2("\000(\000a\000?\000?\000)\000\134\0001\000\000", "\000a\000a\000\000", 0, 0);
+  x2("\000(\000a\000*\000)\000\134\0001\000\000", "\000a\000a\000a\000a\000a\000\000", 0, 8);
+  x3("\000(\000a\000*\000)\000\134\0001\000\000", "\000a\000a\000a\000a\000a\000\000", 0, 4, 1);
+  x2("\000a\000(\000b\000*\000)\000\134\0001\000\000", "\000a\000b\000b\000b\000b\000\000", 0, 10);
+  x2("\000a\000(\000b\000*\000)\000\134\0001\000\000", "\000a\000b\000\000", 0, 2);
+  x2("\000(\000a\000*\000)\000(\000b\000*\000)\000\134\0001\000\134\0002\000\000", "\000a\000a\000a\000b\000b\000a\000a\000a\000b\000b\000\000", 0, 20);
+  x2("\000(\000a\000*\000)\000(\000b\000*\000)\000\134\0002\000\000", "\000a\000a\000a\000b\000b\000b\000b\000\000", 0, 14);
+  x2("\000(\000(\000(\000(\000(\000(\000(\000a\000*\000)\000b\000)\000)\000)\000)\000)\000)\000c\000\134\0007\000\000", "\000a\000a\000a\000b\000c\000a\000a\000a\000\000", 0, 16);
+  x3("\000(\000(\000(\000(\000(\000(\000(\000a\000*\000)\000b\000)\000)\000)\000)\000)\000)\000c\000\134\0007\000\000", "\000a\000a\000a\000b\000c\000a\000a\000a\000\000", 0, 6, 7);
+  x2("\000(\000a\000)\000(\000b\000)\000(\000c\000)\000\134\0002\000\134\0001\000\134\0003\000\000", "\000a\000b\000c\000b\000a\000c\000\000", 0, 12);
+  x2("\000(\000[\000a\000-\000d\000]\000)\000\134\0001\000\000", "\000c\000c\000\000", 0, 4);
+  x2("\000(\000\134\000w\000\134\000d\000\134\000s\000)\000\134\0001\000\000", "\000f\0005\000 \000f\0005\000 \000\000", 0, 12);
+  n("\000(\000\134\000w\000\134\000d\000\134\000s\000)\000\134\0001\000\000", "\000f\0005\000 \000f\0005\000\000");
+  x2("\000(\000w\000h\000o\000|\000[\000a\000-\000c\000]\000{\0003\000}\000)\000\134\0001\000\000", "\000w\000h\000o\000w\000h\000o\000\000", 0, 12);
+  x2("\000.\000.\000.\000(\000w\000h\000o\000|\000[\000a\000-\000c\000]\000{\0003\000}\000)\000\134\0001\000\000", "\000a\000b\000c\000w\000h\000o\000w\000h\000o\000\000", 0, 18);
+  x2("\000(\000w\000h\000o\000|\000[\000a\000-\000c\000]\000{\0003\000}\000)\000\134\0001\000\000", "\000c\000b\000c\000c\000b\000c\000\000", 0, 12);
+  x2("\000(\000^\000a\000)\000\134\0001\000\000", "\000a\000a\000\000", 0, 4);
+  n("\000(\000^\000a\000)\000\134\0001\000\000", "\000b\000a\000a\000\000");
+  n("\000(\000a\000$\000)\000\134\0001\000\000", "\000a\000a\000\000");
+  n("\000(\000a\000b\000\134\000Z\000)\000\134\0001\000\000", "\000a\000b\000\000");
+  x2("\000(\000a\000*\000\134\000Z\000)\000\134\0001\000\000", "\000a\000\000", 2, 2);
+  x2("\000.\000(\000a\000*\000\134\000Z\000)\000\134\0001\000\000", "\000b\000a\000\000", 2, 4);
+  x3("\000(\000.\000(\000a\000b\000c\000)\000\134\0002\000)\000\000", "\000z\000a\000b\000c\000a\000b\000c\000\000", 0, 14, 1);
+  x3("\000(\000.\000(\000.\000.\000\134\000d\000.\000)\000\134\0002\000)\000\000", "\000z\0001\0002\0003\0004\0001\0002\0003\0004\000\000", 0, 18, 1);
+  x2("\000(\000(\000?\000i\000:\000a\000z\000)\000)\000\134\0001\000\000", "\000A\000z\000A\000z\000\000", 0, 8);
+  n("\000(\000(\000?\000i\000:\000a\000z\000)\000)\000\134\0001\000\000", "\000A\000z\000a\000z\000\000");
+  x2("\000(\000?\000<\000=\000a\000)\000b\000\000", "\000a\000b\000\000", 2, 4);
+  n("\000(\000?\000<\000=\000a\000)\000b\000\000", "\000b\000b\000\000");
+  x2("\000(\000?\000<\000=\000a\000|\000b\000)\000b\000\000", "\000b\000b\000\000", 2, 4);
+  x2("\000(\000?\000<\000=\000a\000|\000b\000c\000)\000b\000\000", "\000b\000c\000b\000\000", 4, 6);
+  x2("\000(\000?\000<\000=\000a\000|\000b\000c\000)\000b\000\000", "\000a\000b\000\000", 2, 4);
+  x2("\000(\000?\000<\000=\000a\000|\000b\000c\000|\000|\000d\000e\000f\000g\000h\000i\000j\000|\000k\000l\000m\000n\000o\000p\000q\000|\000r\000)\000z\000\000", "\000r\000z\000\000", 2, 4);
+  x2("\000(\000a\000)\000\134\000g\000<\0001\000>\000\000", "\000a\000a\000\000", 0, 4);
+  x2("\000(\000?\000<\000!\000a\000)\000b\000\000", "\000c\000b\000\000", 2, 4);
+  n("\000(\000?\000<\000!\000a\000)\000b\000\000", "\000a\000b\000\000");
+  x2("\000(\000?\000<\000!\000a\000|\000b\000c\000)\000b\000\000", "\000b\000b\000b\000\000", 0, 2);
+  n("\000(\000?\000<\000!\000a\000|\000b\000c\000)\000z\000\000", "\000b\000c\000z\000\000");
+  x2("\000(\000?\000<\000n\000a\000m\000e\0001\000>\000a\000)\000\000", "\000a\000\000", 0, 2);
+  x2("\000(\000?\000<\000n\000a\000m\000e\000_\0002\000>\000a\000b\000)\000\134\000g\000<\000n\000a\000m\000e\000_\0002\000>\000\000", "\000a\000b\000a\000b\000\000", 0, 8);
+  x2("\000(\000?\000<\000n\000a\000m\000e\000_\0003\000>\000.\000z\000v\000.\000)\000\134\000k\000<\000n\000a\000m\000e\000_\0003\000>\000\000", "\000a\000z\000v\000b\000a\000z\000v\000b\000\000", 0, 16);
+  x2("\000(\000?\000<\000=\000\134\000g\000<\000a\000b\000>\000)\000|\000-\000\134\000z\000E\000N\000D\000 \000(\000?\000<\000a\000b\000>\000X\000y\000Z\000)\000\000", "\000X\000y\000Z\000\000", 6, 6);
+  x2("\000(\000?\000<\000n\000>\000|\000a\000\134\000g\000<\000n\000>\000)\000+\000\000", "\000\000", 0, 0);
+  x2("\000(\000?\000<\000n\000>\000|\000\134\000(\000\134\000g\000<\000n\000>\000\134\000)\000)\000+\000$\000\000", "\000(\000)\000(\000(\000)\000)\000\000", 0, 12);
+  x3("\000\134\000g\000<\000n\000>\000(\000?\000<\000n\000>\000.\000)\000{\0000\000}\000\000", "\000X\000\000", 0, 2, 1);
+  x2("\000\134\000g\000<\000n\000>\000(\000a\000b\000c\000|\000d\000f\000(\000?\000<\000n\000>\000.\000Y\000Z\000)\000{\0002\000,\0008\000}\000)\000{\0000\000}\000\000", "\000X\000Y\000Z\000\000", 0, 6);
+  x2("\000\134\000A\000(\000?\000<\000n\000>\000(\000a\000\134\000g\000<\000n\000>\000)\000|\000)\000\134\000z\000\000", "\000a\000a\000a\000a\000\000", 0, 8);
+  x2("\000(\000?\000<\000n\000>\000|\000\134\000g\000<\000m\000>\000\134\000g\000<\000n\000>\000)\000\134\000z\000|\000\134\000z\000E\000N\000D\000 \000(\000?\000<\000m\000>\000a\000|\000(\000b\000)\000\134\000g\000<\000m\000>\000)\000\000", "\000b\000b\000b\000b\000a\000b\000b\000a\000\000", 0, 16);
+  x2("\000(\000?\000<\000n\000a\000m\000e\0001\0002\0004\0000\000>\000\134\000w\000+\000\134\000s\000x\000)\000a\000+\000\134\000k\000<\000n\000a\000m\000e\0001\0002\0004\0000\000>\000\000", "\000 \000 \000f\000g\000 \000x\000a\000a\000a\000a\000a\000a\000a\000a\000f\000g\000 \000x\000\000", 4, 36);
+  x3("\000(\000z\000)\000(\000)\000(\000)\000(\000?\000<\000_\0009\000>\000a\000)\000\134\000g\000<\000_\0009\000>\000\000", "\000z\000a\000a\000\000", 4, 6, 1);
+  x2("\000(\000.\000)\000(\000(\000(\000?\000<\000_\000>\000a\000)\000)\000)\000\134\000k\000<\000_\000>\000\000", "\000z\000a\000a\000\000", 0, 6);
+  x2("\000(\000(\000?\000<\000n\000a\000m\000e\0001\000>\000\134\000d\000)\000|\000(\000?\000<\000n\000a\000m\000e\0002\000>\000\134\000w\000)\000)\000(\000\134\000k\000<\000n\000a\000m\000e\0001\000>\000|\000\134\000k\000<\000n\000a\000m\000e\0002\000>\000)\000\000", "\000f\000f\000\000", 0, 4);
+  x2("\000(\000?\000:\000(\000?\000<\000x\000>\000)\000|\000(\000?\000<\000x\000>\000e\000f\000g\000)\000)\000\134\000k\000<\000x\000>\000\000", "\000\000", 0, 0);
+  x2("\000(\000?\000:\000(\000?\000<\000x\000>\000a\000b\000c\000)\000|\000(\000?\000<\000x\000>\000e\000f\000g\000)\000)\000\134\000k\000<\000x\000>\000\000", "\000a\000b\000c\000e\000f\000g\000e\000f\000g\000\000", 6, 18);
+  n("\000(\000?\000:\000(\000?\000<\000x\000>\000a\000b\000c\000)\000|\000(\000?\000<\000x\000>\000e\000f\000g\000)\000)\000\134\000k\000<\000x\000>\000\000", "\000a\000b\000c\000e\000f\000g\000\000");
+  x2("\000(\000?\000:\000(\000?\000<\000n\0001\000>\000.\000)\000|\000(\000?\000<\000n\0001\000>\000.\000.\000)\000|\000(\000?\000<\000n\0001\000>\000.\000.\000.\000)\000|\000(\000?\000<\000n\0001\000>\000.\000.\000.\000.\000)\000|\000(\000?\000<\000n\0001\000>\000.\000.\000.\000.\000.\000)\000|\000(\000?\000<\000n\0001\000>\000.\000.\000.\000.\000.\000.\000)\000|\000(\000?\000<\000n\0001\000>\000.\000.\000.\000.\000.\000.\000.\000)\000|\000(\000?\000<\000n\0001\000>\000.\000.\000.\000.\000.\000.\000.\000.\000)\000|\000(\000?\000<\000n\0001\000>\000.\000.\000.\000.\000.\000.\000.\000.\000.\000)\000|\000(\000?\000<\000n\0001\000>\000.\000.\000.\000.\000.\000.\000.\000.\000.\000.\000)\000|\000(\000?\000<\000n\0001\000>\000.\000.\000.\000.\000.\000.\000.\000.\000.\000.\000.\000)\000|\000(\000?\000<\000n\0001\000>\000.\000.\000.\000.\000.\000.\000.\000.\000.\000.\000.\000.\000)\000|\000(\000?\000<\000n\0001\000>\000.\000.\000.\000.\000.\000.\000.\000.\000.\000.\000.\000.\000.\000)\000|\000(\000?\000<\000n\0001\000>\000.\000.\000.\000.\000.\000.\000.\000.\000.\000.\000.\000.\000.\000.\000)\000)\000\134\000k\000<\000n\0001\000>\000$\000\000", "\000a\000-\000p\000y\000u\000m\000p\000y\000u\000m\000\000", 4, 20);
+  x3("\000(\000?\000:\000(\000?\000<\000n\0001\000>\000.\000)\000|\000(\000?\000<\000n\0001\000>\000.\000.\000)\000|\000(\000?\000<\000n\0001\000>\000.\000.\000.\000)\000|\000(\000?\000<\000n\0001\000>\000.\000.\000.\000.\000)\000|\000(\000?\000<\000n\0001\000>\000.\000.\000.\000.\000.\000)\000|\000(\000?\000<\000n\0001\000>\000.\000.\000.\000.\000.\000.\000)\000|\000(\000?\000<\000n\0001\000>\000.\000.\000.\000.\000.\000.\000.\000)\000|\000(\000?\000<\000n\0001\000>\000.\000.\000.\000.\000.\000.\000.\000.\000)\000|\000(\000?\000<\000n\0001\000>\000.\000.\000.\000.\000.\000.\000.\000.\000.\000)\000|\000(\000?\000<\000n\0001\000>\000.\000.\000.\000.\000.\000.\000.\000.\000.\000.\000)\000|\000(\000?\000<\000n\0001\000>\000.\000.\000.\000.\000.\000.\000.\000.\000.\000.\000.\000)\000|\000(\000?\000<\000n\0001\000>\000.\000.\000.\000.\000.\000.\000.\000.\000.\000.\000.\000.\000)\000|\000(\000?\000<\000n\0001\000>\000.\000.\000.\000.\000.\000.\000.\000.\000.\000.\000.\000.\000.\000)\000|\000(\000?\000<\000n\0001\000>\000.\000.\000.\000.\000.\000.\000.\000.\000.\000.\000.\000.\000.\000.\000)\000)\000\134\000k\000<\000n\0001\000>\000$\000\000", "\000x\000x\000x\000x\000a\000b\000c\000d\000e\000f\000g\000h\000i\000j\000k\000l\000m\000n\000a\000b\000c\000d\000e\000f\000g\000h\000i\000j\000k\000l\000m\000n\000\000", 8, 36, 14);
+  x3("\000(\000?\000<\000n\000a\000m\000e\0001\000>\000)\000(\000?\000<\000n\000a\000m\000e\0002\000>\000)\000(\000?\000<\000n\000a\000m\000e\0003\000>\000)\000(\000?\000<\000n\000a\000m\000e\0004\000>\000)\000(\000?\000<\000n\000a\000m\000e\0005\000>\000)\000(\000?\000<\000n\000a\000m\000e\0006\000>\000)\000(\000?\000<\000n\000a\000m\000e\0007\000>\000)\000(\000?\000<\000n\000a\000m\000e\0008\000>\000)\000(\000?\000<\000n\000a\000m\000e\0009\000>\000)\000(\000?\000<\000n\000a\000m\000e\0001\0000\000>\000)\000(\000?\000<\000n\000a\000m\000e\0001\0001\000>\000)\000(\000?\000<\000n\000a\000m\000e\0001\0002\000>\000)\000(\000?\000<\000n\000a\000m\000e\0001\0003\000>\000)\000(\000?\000<\000n\000a\000m\000e\0001\0004\000>\000)\000(\000?\000<\000n\000a\000m\000e\0001\0005\000>\000)\000(\000?\000<\000n\000a\000m\000e\0001\0006\000>\000a\000a\000a\000)\000(\000?\000<\000n\000a\000m\000e\0001\0007\000>\000)\000$\000\000", "\000a\000a\000a\000\000", 0, 6, 16);
+  x2("\000(\000?\000<\000f\000o\000o\000>\000a\000|\000\134\000(\000\134\000g\000<\000f\000o\000o\000>\000\134\000)\000)\000\000", "\000a\000\000", 0, 2);
+  x2("\000(\000?\000<\000f\000o\000o\000>\000a\000|\000\134\000(\000\134\000g\000<\000f\000o\000o\000>\000\134\000)\000)\000\000", "\000(\000(\000(\000(\000(\000(\000a\000)\000)\000)\000)\000)\000)\000\000", 0, 26);
+  x3("\000(\000?\000<\000f\000o\000o\000>\000a\000|\000\134\000(\000\134\000g\000<\000f\000o\000o\000>\000\134\000)\000)\000\000", "\000(\000(\000(\000(\000(\000(\000(\000(\000a\000)\000)\000)\000)\000)\000)\000)\000)\000\000", 0, 34, 1);
+  x2("\000\134\000g\000<\000b\000a\000r\000>\000|\000\134\000z\000E\000N\000D\000(\000?\000<\000b\000a\000r\000>\000.\000*\000a\000b\000c\000$\000)\000\000", "\000a\000b\000c\000x\000x\000x\000a\000b\000c\000\000", 0, 18);
+  x2("\000\134\000g\000<\0001\000>\000|\000\134\000z\000E\000N\000D\000(\000.\000a\000.\000)\000\000", "\000b\000a\000c\000\000", 0, 6);
+  x3("\000\134\000g\000<\000_\000A\000>\000\134\000g\000<\000_\000A\000>\000|\000\134\000z\000E\000N\000D\000(\000.\000a\000.\000)\000(\000?\000<\000_\000A\000>\000.\000b\000.\000)\000\000", "\000x\000b\000x\000y\000b\000y\000\000", 6, 12, 1);
+  x2("\000\134\000A\000(\000?\000:\000\134\000g\000<\000p\000o\000n\000>\000|\000\134\000g\000<\000p\000a\000n\000>\000|\000\134\000z\000E\000N\000D\000 \000 \000(\000?\000<\000p\000a\000n\000>\000a\000|\000c\000\134\000g\000<\000p\000o\000n\000>\000c\000)\000(\000?\000<\000p\000o\000n\000>\000b\000|\000d\000\134\000g\000<\000p\000a\000n\000>\000d\000)\000)\000$\000\000", "\000c\000d\000c\000b\000c\000d\000c\000\000", 0, 14);
+  x2("\000\134\000A\000(\000?\000<\000n\000>\000|\000a\000\134\000g\000<\000m\000>\000)\000\134\000z\000|\000\134\000z\000E\000N\000D\000 \000(\000?\000<\000m\000>\000\134\000g\000<\000n\000>\000)\000\000", "\000a\000a\000a\000a\000\000", 0, 8);
+  x2("\000(\000?\000<\000n\000>\000(\000a\000|\000b\000\134\000g\000<\000n\000>\000c\000)\000{\0003\000,\0005\000}\000)\000\000", "\000b\000a\000a\000a\000a\000c\000a\000\000", 2, 10);
+  x2("\000(\000?\000<\000n\000>\000(\000a\000|\000b\000\134\000g\000<\000n\000>\000c\000)\000{\0003\000,\0005\000}\000)\000\000", "\000b\000a\000a\000a\000a\000c\000a\000a\000a\000a\000a\000\000", 0, 20);
+  x2("\000(\000?\000<\000p\000a\000r\000e\000>\000\134\000(\000(\000[\000^\000\134\000(\000\134\000)\000]\000+\000+\000|\000\134\000g\000<\000p\000a\000r\000e\000>\000)\000*\000+\000\134\000)\000)\000\000", "\000(\000(\000a\000)\000)\000\000", 0, 10);
+  x2("\000(\000)\000*\000\134\0001\000\000", "\000\000", 0, 0);
+  x2("\000(\000?\000:\000(\000)\000|\000(\000)\000)\000*\000\134\0001\000\134\0002\000\000", "\000\000", 0, 0);
+  x3("\000(\000?\000:\000\134\0001\000a\000|\000(\000)\000)\000*\000\000", "\000a\000\000", 0, 0, 1);
+  x2("\000x\000(\000(\000.\000)\000*\000)\000*\000x\000\000", "\0000\000x\0001\000x\0002\000x\0003\000\000", 2, 12);
+  x2("\000x\000(\000(\000.\000)\000*\000)\000*\000x\000(\000?\000i\000:\000\134\0001\000)\000\134\000Z\000\000", "\0000\000x\0001\000x\0002\000x\0001\000X\0002\000\000", 2, 18);
+  x2("\000(\000?\000:\000(\000)\000|\000(\000)\000|\000(\000)\000|\000(\000)\000|\000(\000)\000|\000(\000)\000)\000*\000\134\0002\000\134\0005\000\000", "\000\000", 0, 0);
+  x2("\000(\000?\000:\000(\000)\000|\000(\000)\000|\000(\000)\000|\000(\000x\000)\000|\000(\000)\000|\000(\000)\000)\000*\000\134\0002\000b\000\134\0005\000\000", "\000b\000\000", 0, 2);
+  x2("\217\372\000\000", "\217\372\000\000", 0, 2);
+  x2("\000\000", "0B\000\000", 0, 0);
+  x2("0B\000\000", "0B\000\000", 0, 2);
+  n("0D\000\000", "0B\000\000");
+  x2("0F0F\000\000", "0F0F\000\000", 0, 4);
+  x2("0B0D0F\000\000", "0B0D0F\000\000", 0, 6);
+  x2("0S0S0S0S0S0S0S0S0S0S0S0S0S0S0S0S0S0S0S0S0S0S0S0S0S0S0S0S0S0S0S0S0S0S0S\000\000", "0S0S0S0S0S0S0S0S0S0S0S0S0S0S0S0S0S0S0S0S0S0S0S0S0S0S0S0S0S0S0S0S0S0S0S\000\000", 0, 70);
+  x2("0B\000\000", "0D0B\000\000", 2, 4);
+  x2("0D0F\000\000", "0B0D0F\000\000", 2, 6);
+  x2("e\207\000\000", "e\207\000\000", 0, 2);
+  x2("\000.\000\000", "0B\000\000", 0, 2);
+  x2("\000.\000.\000\000", "0K0M\000\000", 0, 4);
+  x2("\000\134\000w\000\000", "0J\000\000", 0, 2);
+  n("\000\134\000W\000\000", "0B\000\000");
+  x2("\000[\000\134\000W\000]\000\000", "0F\000$\000\000", 2, 4);
+  x2("\000\134\000S\000\000", "0]\000\000", 0, 2);
+  x2("\000\134\000S\000\000", "o\042\000\000", 0, 2);
+  x2("\000\134\000b\000\000", "l\027\000 \000\000", 0, 0);
+  x2("\000\134\000b\000\000", "\000 0{\000\000", 2, 2);
+  x2("\000\134\000B\000\000", "0[0]\000 \000\000", 2, 2);
+  x2("\000\134\000B\000\000", "0F\000 \000\000", 4, 4);
+  x2("\000\134\000B\000\000", "\000 0D\000\000", 0, 0);
+  x2("\000[0_0a\000]\000\000", "0a\000\000", 0, 2);
+  n("\000[0j0k\000]\000\000", "0l\000\000");
+  x2("\000[0F\000-0J\000]\000\000", "0H\000\000", 0, 2);
+  n("\000[\000^0Q\000]\000\000", "0Q\000\000");
+  x2("\000[\000\134\000w\000]\000\000", "0m\000\000", 0, 2);
+  n("\000[\000\134\000d\000]\000\000", "0u\000\000");
+  x2("\000[\000\134\000D\000]\000\000", "0o\000\000", 0, 2);
+  n("\000[\000\134\000s\000]\000\000", "0O\000\000");
+  x2("\000[\000\134\000S\000]\000\000", "0x\000\000", 0, 2);
+  x2("\000[\000\134\000w\000\134\000d\000]\000\000", "0\210\000\000", 0, 2);
+  x2("\000[\000\134\000w\000\134\000d\000]\000\000", "\000 \000 \000 0\210\000\000", 6, 8);
+  n("\000\134\000w\233<\216\312\000\000", "\000 \233<\216\312\000\000");
+  x2("\233<\000\134\000W\216\312\000\000", "\233<\000 \216\312\000\000", 0, 6);
+  x2("0B\000.0D\000.0F\000\000", "0B0B0D0D0F\000\000", 0, 10);
+  x2("\000.\000\134\000w0F\000\134\000W\000.\000.0^\000\000", "0H0F0F\000 0F0^0^\000\000", 0, 14);
+  x2("\000\134\000s\000\134\000w0S0S0S\000\000", "\000 0S0S0S0S\000\000", 0, 10);
+  x2("0B0B\000.0Q\000\000", "0B0B0Q0Q\000\000", 0, 8);
+  n("\000.0D\000\000", "0D0H\000\000");
+  x2("\000.0J\000\000", "0J0J\000\000", 0, 4);
+  x2("\000^0B\000\000", "0B\000\000", 0, 2);
+  x2("\000^0\200\000$\000\000", "0\200\000\000", 0, 2);
+  x2("\000^\000\134\000w\000$\000\000", "0k\000\000", 0, 2);
+  x2("\000^\000\134\000w0K0M0O0Q0S\000$\000\000", "\000z0K0M0O0Q0S\000\000", 0, 12);
+  x2("\000^\000\134\000w\000.\000.\000.0F0H0J\000$\000\000", "\000z0B0D0F0F0H0J\000\000", 0, 14);
+  x2("\000\134\000w\000\134\000w\000\134\000s\000\134\000W0J0J0J\000\134\000d\000\000", "\000a0J\000 \000 0J0J0J\0004\000\000", 0, 16);
+  x2("\000\134\000A0_0a0d\000\000", "0_0a0d\000\000", 0, 6);
+  x2("0\2000\2010\202\000\134\000Z\000\000", "0\2000\2010\202\000\000", 0, 6);
+  x2("0K0M0O\000\134\000z\000\000", "0K0M0O\000\000", 0, 6);
+  x2("0K0M0O\000\134\000Z\000\000", "0K0M0O\000\012\000\000", 0, 6);
+  x2("\000\134\000G0}0t\000\000", "0}0t\000\000", 0, 4);
+  n("\000\134\000G0H\000\000", "0F0H0J\000\000");
+  n("0h0f\000\134\000G\000\000", "0h0f\000\000");
+  n("0~0\177\000\134\000A\000\000", "0~0\177\000\000");
+  n("0~\000\134\000A0\177\000\000", "0~0\177\000\000");
+  x2("\000(\000?\000=0[\000)0[\000\000", "0[\000\000", 0, 2);
+  n("\000(\000?\000=0F\000)\000.\000\000", "0D\000\000");
+  x2("\000(\000?\000!0F\000)0K\000\000", "0K\000\000", 0, 2);
+  n("\000(\000?\000!0h\000)0B\000\000", "0h\000\000");
+  x2("\000(\000?\000i\000:0B\000)\000\000", "0B\000\000", 0, 2);
+  x2("\000(\000?\000i\000:0v0y\000)\000\000", "0v0y\000\000", 0, 4);
+  n("\000(\000?\000i\000:0D\000)\000\000", "0F\000\000");
+  x2("\000(\000?\000m\000:0\210\000.\000)\000\000", "0\210\000\012\000\000", 0, 4);
+  x2("\000(\000?\000m\000:\000.0\201\000)\000\000", "0~\000\0120\201\000\000", 2, 6);
+  x2("0B\000?\000\000", "\000\000", 0, 0);
+  x2("Y\011\000?\000\000", "S\026\000\000", 0, 0);
+  x2("Y\011\000?\000\000", "Y\011\000\000", 0, 2);
+  x2("\221\317\000*\000\000", "\000\000", 0, 0);
+  x2("\221\317\000*\000\000", "\221\317\000\000", 0, 2);
+  x2("[P\000*\000\000", "[P[P[P\000\000", 0, 6);
+  x2("\231\254\000*\000\000", "\236\177\231\254\231\254\231\254\231\254\000\000", 0, 0);
+  n("\134q\000+\000\000", "\000\000");
+  x2("l\263\000+\000\000", "l\263\000\000", 0, 2);
+  x2("fB\000+\000\000", "fBfBfBfB\000\000", 0, 8);
+  x2("0H\000+\000\000", "0H0H0F0F0F\000\000", 0, 4);
+  x2("0F\000+\000\000", "0J0F0F0F0F\000\000", 2, 10);
+  x2("\000.\000?\000\000", "0_\000\000", 0, 2);
+  x2("\000.\000*\000\000", "0q0t0w0z\000\000", 0, 8);
+  x2("\000.\000+\000\000", "0\215\000\000", 0, 2);
+  x2("\000.\000+\000\000", "0D0F0H0K\000\012\000\000", 0, 8);
+  x2("0B\000|0D\000\000", "0B\000\000", 0, 2);
+  x2("0B\000|0D\000\000", "0D\000\000", 0, 2);
+  x2("0B0D\000|0D0F\000\000", "0B0D\000\000", 0, 4);
+  x2("0B0D\000|0D0F\000\000", "0D0F\000\000", 0, 4);
+  x2("0\222\000(\000?\000:0K0M\000|0M0O\000)\000\000", "0\2220K0M\000\000", 0, 6);
+  x2("0\222\000(\000?\000:0K0M\000|0M0O\000)0Q\000\000", "0\2220M0O0Q\000\000", 0, 8);
+  x2("0B0D\000|\000(\000?\000:0B0F\000|0B0\222\000)\000\000", "0B0\222\000\000", 0, 4);
+  x2("0B\000|0D\000|0F\000\000", "0H0F\000\000", 2, 4);
+  x2("0B\000|0D\000|0F0H\000|0J0K0M\000|0O\000|0Q0S0U\000|0W0Y0[\000|0]\000|0_0a\000|0d0f0h0j0k\000|0l0m\000\000", "0W0Y0[\000\000", 0, 6);
+  n("0B\000|0D\000|0F0H\000|0J0K0M\000|0O\000|0Q0S0U\000|0W0Y0[\000|0]\000|0_0a\000|0d0f0h0j0k\000|0l0m\000\000", "0Y0[\000\000");
+  x2("0B\000|\000^0\217\000\000", "0v0B\000\000", 2, 4);
+  x2("0B\000|\000^0\222\000\000", "0\2220B\000\000", 0, 2);
+  x2("\233<\000|\000\134\000G\216\312\000\000", "0Q\216\312\233<\000\000", 4, 6);
+  x2("\233<\000|\000\134\000G\216\312\000\000", "\216\312\233<\000\000", 0, 2);
+  x2("\233<\000|\000\134\000A\216\312\000\000", "\000b\216\312\233<\000\000", 4, 6);
+  x2("\233<\000|\000\134\000A\216\312\000\000", "\216\312\000\000", 0, 2);
+  x2("\233<\000|\216\312\000\134\000Z\000\000", "\216\312\233<\000\000", 2, 4);
+  x2("\233<\000|\216\312\000\134\000Z\000\000", "\216\312\000\000", 0, 2);
+  x2("\233<\000|\216\312\000\134\000Z\000\000", "\216\312\000\012\000\000", 0, 2);
+  x2("\233<\000|\216\312\000\134\000z\000\000", "\216\312\233<\000\000", 2, 4);
+  x2("\233<\000|\216\312\000\134\000z\000\000", "\216\312\000\000", 0, 2);
+  x2("\000\134\000w\000|\000\134\000s\000\000", "0J\000\000", 0, 2);
+  x2("\000\134\000w\000|\000%\000\000", "\000%0J\000\000", 0, 2);
+  x2("\000\134\000w\000|\000[\000&\000$\000]\000\000", "0F\000&\000\000", 0, 2);
+  x2("\000[0D\000-0Q\000]\000\000", "0F\000\000", 0, 2);
+  x2("\000[0D\000-0Q\000]\000|\000[\000^0K\000-0S\000]\000\000", "0B\000\000", 0, 2);
+  x2("\000[0D\000-0Q\000]\000|\000[\000^0K\000-0S\000]\000\000", "0K\000\000", 0, 2);
+  x2("\000[\000^0B\000]\000\000", "\000\012\000\000", 0, 2);
+  x2("\000(\000?\000:0B\000|\000[0F\000-0M\000]\000)\000|0D0\222\000\000", "0F0\222\000\000", 0, 2);
+  x2("\000(\000?\000:0B\000|\000[0F\000-0M\000]\000)\000|0D0\222\000\000", "0D0\222\000\000", 0, 4);
+  x2("0B0D0F\000|\000(\000?\000=0Q0Q\000)\000.\000.0{\000\000", "0Q0Q0{\000\000", 0, 6);
+  x2("0B0D0F\000|\000(\000?\000!0Q0Q\000)\000.\000.0{\000\000", "0B0D0{\000\000", 0, 6);
+  x2("\000(\000?\000=0\2220B\000)\000.\000.0B\000|\000(\000?\000=0\2220\222\000)\000.\000.0B\000\000", "0\2220\2220B\000\000", 0, 6);
+  x2("\000(\000?\000<\000=0B\000|0D0F\000)0D\000\000", "0D0F0D\000\000", 4, 6);
+  n("\000(\000?\000>0B\000|0B0D0H\000)0F\000\000", "0B0D0H0F\000\000");
+  x2("\000(\000?\000>0B0D0H\000|0B\000)0F\000\000", "0B0D0H0F\000\000", 0, 8);
+  x2("0B\000?\000|0D\000\000", "0B\000\000", 0, 2);
+  x2("0B\000?\000|0D\000\000", "0D\000\000", 0, 0);
+  x2("0B\000?\000|0D\000\000", "\000\000", 0, 0);
+  x2("0B\000*\000|0D\000\000", "0B0B\000\000", 0, 4);
+  x2("0B\000*\000|0D\000*\000\000", "0D0B\000\000", 0, 0);
+  x2("0B\000*\000|0D\000*\000\000", "0B0D\000\000", 0, 2);
+  x2("\000[\000a0B\000]\000*\000|0D\000*\000\000", "\000a0B0D0D0D\000\000", 0, 4);
+  x2("0B\000+\000|0D\000*\000\000", "\000\000", 0, 0);
+  x2("0B\000+\000|0D\000*\000\000", "0D0D0D\000\000", 0, 6);
+  x2("0B\000+\000|0D\000*\000\000", "0B0D0D0D\000\000", 0, 2);
+  x2("0B\000+\000|0D\000*\000\000", "\000a0B0D0D0D\000\000", 0, 0);
+  n("0B\000+\000|0D\000+\000\000", "\000\000");
+  x2("\000(0B\000|0D\000)\000?\000\000", "0D\000\000", 0, 2);
+  x2("\000(0B\000|0D\000)\000*\000\000", "0D0B\000\000", 0, 4);
+  x2("\000(0B\000|0D\000)\000+\000\000", "0D0B0D\000\000", 0, 6);
+  x2("\000(0B0D\000|0F0B\000)\000+\000\000", "0F0B0B0D0F0H\000\000", 0, 8);
+  x2("\000(0B0D\000|0F0H\000)\000+\000\000", "0F0B0B0D0F0H\000\000", 4, 12);
+  x2("\000(0B0D\000|0F0B\000)\000+\000\000", "0B0B0D0F0B\000\000", 2, 10);
+  x2("\000(0B0D\000|0F0B\000)\000+\000\000", "0B0D0\2220F0B\000\000", 0, 4);
+  x2("\000(0B0D\000|0F0B\000)\000+\000\000", "\000$\000$\000z\000z\000z\000z0B0D0\2220F0B\000\000", 12, 16);
+  x2("\000(0B\000|0D0B0D\000)\000+\000\000", "0B0D0B0D0B\000\000", 0, 10);
+  x2("\000(0B\000|0D0B0D\000)\000+\000\000", "0D0B\000\000", 2, 4);
+  x2("\000(0B\000|0D0B0D\000)\000+\000\000", "0D0B0B0B0D0B\000\000", 2, 8);
+  x2("\000(\000?\000:0B\000|0D\000)\000(\000?\000:0B\000|0D\000)\000\000", "0B0D\000\000", 0, 4);
+  x2("\000(\000?\000:0B\000*\000|0D\000*\000)\000(\000?\000:0B\000*\000|0D\000*\000)\000\000", "0B0B0B0D0D0D\000\000", 0, 6);
+  x2("\000(\000?\000:0B\000*\000|0D\000*\000)\000(\000?\000:0B\000+\000|0D\000+\000)\000\000", "0B0B0B0D0D0D\000\000", 0, 12);
+  x2("\000(\000?\000:0B\000+\000|0D\000+\000)\000{\0002\000}\000\000", "0B0B0B0D0D0D\000\000", 0, 12);
+  x2("\000(\000?\000:0B\000+\000|0D\000+\000)\000{\0001\000,\0002\000}\000\000", "0B0B0B0D0D0D\000\000", 0, 12);
+  x2("\000(\000?\000:0B\000+\000|\000\134\000A0D\000*\000)0F0F\000\000", "0F0F\000\000", 0, 4);
+  n("\000(\000?\000:0B\000+\000|\000\134\000A0D\000*\000)0F0F\000\000", "0B0D0F0F\000\000");
+  x2("\000(\000?\000:\000^0B\000+\000|0D\000+\000)\000*0F\000\000", "0B0B0D0D0D0B0D0F\000\000", 12, 16);
+  x2("\000(\000?\000:\000^0B\000+\000|0D\000+\000)\000*0F\000\000", "0B0B0D0D0D0D0F\000\000", 0, 14);
+  x2("0F\000{\0000\000,\000}\000\000", "0F0F0F0F\000\000", 0, 8);
+  x2("0B\000|\000(\000?\000i\000)\000c\000\000", "\000C\000\000", 0, 2);
+  x2("\000(\000?\000i\000)\000c\000|0B\000\000", "\000C\000\000", 0, 2);
+  x2("\000(\000?\000i\000:0B\000)\000|\000a\000\000", "\000a\000\000", 0, 2);
+  n("\000(\000?\000i\000:0B\000)\000|\000a\000\000", "\000A\000\000");
+  x2("\000[0B0D0F\000]\000?\000\000", "0B0D0F\000\000", 0, 2);
+  x2("\000[0B0D0F\000]\000*\000\000", "0B0D0F\000\000", 0, 6);
+  x2("\000[\000^0B0D0F\000]\000*\000\000", "0B0D0F\000\000", 0, 0);
+  n("\000[\000^0B0D0F\000]\000+\000\000", "0B0D0F\000\000");
+  x2("0B\000?\000?\000\000", "0B0B0B\000\000", 0, 0);
+  x2("0D0B\000?\000?0D\000\000", "0D0B0D\000\000", 0, 6);
+  x2("0B\000*\000?\000\000", "0B0B0B\000\000", 0, 0);
+  x2("0D0B\000*\000?\000\000", "0D0B0B\000\000", 0, 2);
+  x2("0D0B\000*\000?0D\000\000", "0D0B0B0D\000\000", 0, 8);
+  x2("0B\000+\000?\000\000", "0B0B0B\000\000", 0, 2);
+  x2("0D0B\000+\000?\000\000", "0D0B0B\000\000", 0, 4);
+  x2("0D0B\000+\000?0D\000\000", "0D0B0B0D\000\000", 0, 8);
+  x2("\000(\000?\000:Y)\000?\000)\000?\000?\000\000", "Y)\000\000", 0, 0);
+  x2("\000(\000?\000:Y)\000?\000?\000)\000?\000\000", "Y)\000\000", 0, 0);
+  x2("\000(\000?\000:Y\042\000?\000)\000+\000?\000\000", "Y\042Y\042Y\042\000\000", 0, 2);
+  x2("\000(\000?\000:\230\250\000+\000)\000?\000?\000\000", "\230\250\230\250\230\250\000\000", 0, 0);
+  x2("\000(\000?\000:\226\352\000+\000)\000?\000?\227\034\000\000", "\226\352\226\352\226\352\227\034\000\000", 0, 8);
+  x2("\000(\000?\000:0B0D\000)\000?\000{\0002\000}\000\000", "\000\000", 0, 0);
+  x2("\000(\000?\000:\233<\216\312\000)\000?\000{\0002\000}\000\000", "\233<\216\312\233<\216\312\233<\000\000", 0, 8);
+  x2("\000(\000?\000:\233<\216\312\000)\000*\000{\0000\000}\000\000", "\233<\216\312\233<\216\312\233<\000\000", 0, 0);
+  x2("\000(\000?\000:\233<\216\312\000)\000{\0003\000,\000}\000\000", "\233<\216\312\233<\216\312\233<\216\312\233<\216\312\000\000", 0, 16);
+  n("\000(\000?\000:\233<\216\312\000)\000{\0003\000,\000}\000\000", "\233<\216\312\233<\216\312\000\000");
+  x2("\000(\000?\000:\233<\216\312\000)\000{\0002\000,\0004\000}\000\000", "\233<\216\312\233<\216\312\233<\216\312\000\000", 0, 12);
+  x2("\000(\000?\000:\233<\216\312\000)\000{\0002\000,\0004\000}\000\000", "\233<\216\312\233<\216\312\233<\216\312\233<\216\312\233<\216\312\000\000", 0, 16);
+  x2("\000(\000?\000:\233<\216\312\000)\000{\0002\000,\0004\000}\000?\000\000", "\233<\216\312\233<\216\312\233<\216\312\233<\216\312\233<\216\312\000\000", 0, 8);
+  x2("\000(\000?\000:\233<\216\312\000)\000{\000,\000}\000\000", "\233<\216\312\000{\000,\000}\000\000", 0, 10);
+  x2("\000(\000?\000:0K0M0O\000)\000+\000?\000{\0002\000}\000\000", "0K0M0O0K0M0O0K0M0O\000\000", 0, 12);
+  x3("\000(pk\000)\000\000", "pk\000\000", 0, 2, 1);
+  x3("\000(pkl4\000)\000\000", "pkl4\000\000", 0, 4, 1);
+  x2("\000(\000(fB\225\223\000)\000)\000\000", "fB\225\223\000\000", 0, 4);
+  x3("\000(\000(\230\250l4\000)\000)\000\000", "\230\250l4\000\000", 0, 4, 1);
+  x3("\000(\000(f(e\345\000)\000)\000\000", "f(e\345\000\000", 0, 4, 2);
+  x3("\000(\000(\000(\000(\000(\000(\000(\000(\000(\000(\000(\000(\000(\000(\000(\000(\000(\000(\000(\000(\221\317[P\000)\000)\000)\000)\000)\000)\000)\000)\000)\000)\000)\000)\000)\000)\000)\000)\000)\000)\000)\000)\000\000", "\221\317[P\000\000", 0, 4, 20);
+  x3("\000(0B0D\000)\000(0F0H\000)\000\000", "0B0D0F0H\000\000", 0, 4, 1);
+  x3("\000(0B0D\000)\000(0F0H\000)\000\000", "0B0D0F0H\000\000", 4, 8, 2);
+  x3("\000(\000)\000(0B\000)0D0F\000(0H0J0K\000)0M0O0Q0S\000\000", "0B0D0F0H0J0K0M0O0Q0S\000\000", 6, 12, 3);
+  x3("\000(\000(\000)\000(0B\000)0D0F\000(0H0J0K\000)0M0O0Q0S\000)\000\000", "0B0D0F0H0J0K0M0O0Q0S\000\000", 6, 12, 4);
+  x3("\000.\000*\000(0\3250\251\000)0\3630\3730\336\000(0\363\000(\000)0\2670\3450\277\000)0\2440\363\000\000", "0\3250\2510\3630\3730\3360\3630\2670\3450\2770\2440\363\000\000", 10, 18, 2);
+  x2("\000(\000^0B\000)\000\000", "0B\000\000", 0, 2);
+  x3("\000(0B\000)\000|\000(0B\000)\000\000", "0D0B\000\000", 2, 4, 1);
+  x3("\000(\000^0B\000)\000|\000(0B\000)\000\000", "0D0B\000\000", 2, 4, 2);
+  x3("\000(0B\000?\000)\000\000", "0B0B0B\000\000", 0, 2, 1);
+  x3("\000(0~\000*\000)\000\000", "0~0~0~\000\000", 0, 6, 1);
+  x3("\000(0h\000*\000)\000\000", "\000\000", 0, 0, 1);
+  x3("\000(0\213\000+\000)\000\000", "0\2130\2130\2130\2130\2130\2130\213\000\000", 0, 14, 1);
+  x3("\000(0u\000+\000|0x\000*\000)\000\000", "0u0u0u0x0x\000\000", 0, 6, 1);
+  x3("\000(0B\000+\000|0D\000?\000)\000\000", "0D0D0D0B0B\000\000", 0, 2, 1);
+  x3("\000(0B0D0F\000)\000?\000\000", "0B0D0F\000\000", 0, 6, 1);
+  x3("\000(0B0D0F\000)\000*\000\000", "0B0D0F\000\000", 0, 6, 1);
+  x3("\000(0B0D0F\000)\000+\000\000", "0B0D0F\000\000", 0, 6, 1);
+  x3("\000(0U0W0Y\000|0B0D0F\000)\000+\000\000", "0B0D0F\000\000", 0, 6, 1);
+  x3("\000(\000[0j0k0l\000]\000[0K0M0O\000]\000|0K0M0O\000)\000+\000\000", "0K0M0O\000\000", 0, 6, 1);
+  x3("\000(\000(\000?\000i\000:0B0D0F\000)\000)\000\000", "0B0D0F\000\000", 0, 6, 1);
+  x3("\000(\000(\000?\000m\000:0B\000.0F\000)\000)\000\000", "0B\000\0120F\000\000", 0, 6, 1);
+  x3("\000(\000(\000?\000=0B0\223\000)0B\000)\000\000", "0B0\2230D\000\000", 0, 2, 1);
+  x3("0B0D0F\000|\000(\000.0B0D0H\000)\000\000", "0\2230B0D0H\000\000", 0, 8, 1);
+  x3("0B\000*\000(\000.\000)\000\000", "0B0B0B0B0\223\000\000", 8, 10, 1);
+  x3("0B\000*\000?\000(\000.\000)\000\000", "0B0B0B0B0\223\000\000", 0, 2, 1);
+  x3("0B\000*\000?\000(0\223\000)\000\000", "0B0B0B0B0\223\000\000", 8, 10, 1);
+  x3("\000[0D0F0H\000]0B\000*\000(\000.\000)\000\000", "0H0B0B0B0B0\223\000\000", 10, 12, 1);
+  x3("\000(\000\134\000A0D0D\000)0F0F\000\000", "0D0D0F0F\000\000", 0, 4, 1);
+  n("\000(\000\134\000A0D0D\000)0F0F\000\000", "0\2230D0D0F0F\000\000");
+  x3("\000(\000^0D0D\000)0F0F\000\000", "0D0D0F0F\000\000", 0, 4, 1);
+  n("\000(\000^0D0D\000)0F0F\000\000", "0\2230D0D0F0F\000\000");
+  x3("0\2150\215\000(0\2130\213\000$\000)\000\000", "0\2150\2150\2130\213\000\000", 4, 8, 1);
+  n("0\2150\215\000(0\2130\213\000$\000)\000\000", "0\2150\2150\2130\2130\213\000\000");
+  x2("\000(q!\000)\000\134\0001\000\000", "q!q!\000\000", 0, 4);
+  n("\000(q!\000)\000\134\0001\000\000", "q!kf\000\000");
+  x2("\000(zz\000?\000)\000\134\0001\000\000", "zzzz\000\000", 0, 4);
+  x2("\000(zz\000?\000?\000)\000\134\0001\000\000", "zzzz\000\000", 0, 0);
+  x2("\000(zz\000*\000)\000\134\0001\000\000", "zzzzzzzzzz\000\000", 0, 8);
+  x3("\000(zz\000*\000)\000\134\0001\000\000", "zzzzzzzzzz\000\000", 0, 4, 1);
+  x2("0B\000(0D\000*\000)\000\134\0001\000\000", "0B0D0D0D0D\000\000", 0, 10);
+  x2("0B\000(0D\000*\000)\000\134\0001\000\000", "0B0D\000\000", 0, 2);
+  x2("\000(0B\000*\000)\000(0D\000*\000)\000\134\0001\000\134\0002\000\000", "0B0B0B0D0D0B0B0B0D0D\000\000", 0, 20);
+  x2("\000(0B\000*\000)\000(0D\000*\000)\000\134\0002\000\000", "0B0B0B0D0D0D0D\000\000", 0, 14);
+  x3("\000(0B\000*\000)\000(0D\000*\000)\000\134\0002\000\000", "0B0B0B0D0D0D0D\000\000", 6, 10, 2);
+  x2("\000(\000(\000(\000(\000(\000(\000(0}\000*\000)0z\000)\000)\000)\000)\000)\000)0t\000\134\0007\000\000", "0}0}0}0z0t0}0}0}\000\000", 0, 16);
+  x3("\000(\000(\000(\000(\000(\000(\000(0}\000*\000)0z\000)\000)\000)\000)\000)\000)0t\000\134\0007\000\000", "0}0}0}0z0t0}0}0}\000\000", 0, 6, 7);
+  x2("\000(0o\000)\000(0r\000)\000(0u\000)\000\134\0002\000\134\0001\000\134\0003\000\000", "0o0r0u0r0o0u\000\000", 0, 12);
+  x2("\000(\000[0M\000-0Q\000]\000)\000\134\0001\000\000", "0O0O\000\000", 0, 4);
+  x2("\000(\000\134\000w\000\134\000d\000\134\000s\000)\000\134\0001\000\000", "0B\0005\000 0B\0005\000 \000\000", 0, 12);
+  n("\000(\000\134\000w\000\134\000d\000\134\000s\000)\000\134\0001\000\000", "0B\0005\000 0B\0005\000\000");
+  x2("\000(\212\260\377\037\000|\000[0B\000-0F\000]\000{\0003\000}\000)\000\134\0001\000\000", "\212\260\377\037\212\260\377\037\000\000", 0, 8);
+  x2("\000.\000.\000.\000(\212\260\377\037\000|\000[0B\000-0F\000]\000{\0003\000}\000)\000\134\0001\000\000", "0B\000a0B\212\260\377\037\212\260\377\037\000\000", 0, 14);
+  x2("\000(\212\260\377\037\000|\000[0B\000-0F\000]\000{\0003\000}\000)\000\134\0001\000\000", "0F0D0F0F0D0F\000\000", 0, 12);
+  x2("\000(\000^0S\000)\000\134\0001\000\000", "0S0S\000\000", 0, 4);
+  n("\000(\000^0\200\000)\000\134\0001\000\000", "0\2010\2000\200\000\000");
+  n("\000(0B\000$\000)\000\134\0001\000\000", "0B0B\000\000");
+  n("\000(0B0D\000\134\000Z\000)\000\134\0001\000\000", "0B0D\000\000");
+  x2("\000(0B\000*\000\134\000Z\000)\000\134\0001\000\000", "0B\000\000", 2, 2);
+  x2("\000.\000(0B\000*\000\134\000Z\000)\000\134\0001\000\000", "0D0B\000\000", 2, 4);
+  x3("\000(\000.\000(0\2040D0\206\000)\000\134\0002\000)\000\000", "\000z0\2040D0\2060\2040D0\206\000\000", 0, 14, 1);
+  x3("\000(\000.\000(\000.\000.\000\134\000d\000.\000)\000\134\0002\000)\000\000", "0B\0001\0002\0003\0004\0001\0002\0003\0004\000\000", 0, 18, 1);
+  x2("\000(\000(\000?\000i\000:0B\000v0Z\000)\000)\000\134\0001\000\000", "0B\000v0Z0B\000v0Z\000\000", 0, 12);
+  x2("\000(\000?\000<a\0320K\000>Y\011\000|\000\134\000(\000\134\000g\000<a\0320K\000>\000\134\000)\000)\000\000", "\000(\000(\000(\000(\000(\000(Y\011\000)\000)\000)\000)\000)\000)\000\000", 0, 26);
+  x2("\000\134\000A\000(\000?\000:\000\134\000g\000<\226?\000_\0001\000>\000|\000\134\000g\000<N\221\000_\0002\000>\000|\000\134\000z}BN\206\000 \000 \000(\000?\000<\226?\000_\0001\000>\211\263\000|\201\352\000\134\000g\000<N\221\000_\0002\000>\201\352\000)\000(\000?\000<N\221\000_\0002\000>W(\000|\203\351\205\251\000\134\000g\000<\226?\000_\0001\000>\203\351\205\251\000)\000)\000$\000\000", "\203\351\205\251\201\352\203\351\205\251\201\352W(\201\352\203\351\205\251\201\352\203\351\205\251\000\000", 0, 26);
+  x2("\000[\000[0r0u\000]\000]\000\000", "0u\000\000", 0, 2);
+  x2("\000[\000[0D0J0F\000]0K\000]\000\000", "0K\000\000", 0, 2);
+  n("\000[\000[\000^0B\000]\000]\000\000", "0B\000\000");
+  n("\000[\000^\000[0B\000]\000]\000\000", "0B\000\000");
+  x2("\000[\000^\000[\000^0B\000]\000]\000\000", "0B\000\000", 0, 2);
+  x2("\000[\000[0K0M0O\000]\000&\000&0M0O\000]\000\000", "0O\000\000", 0, 2);
+  n("\000[\000[0K0M0O\000]\000&\000&0M0O\000]\000\000", "0K\000\000");
+  n("\000[\000[0K0M0O\000]\000&\000&0M0O\000]\000\000", "0Q\000\000");
+  x2("\000[0B\000-0\223\000&\000&0D\000-0\222\000&\000&0F\000-0\221\000]\000\000", "0\221\000\000", 0, 2);
+  n("\000[\000^0B\000-0\223\000&\000&0D\000-0\222\000&\000&0F\000-0\221\000]\000\000", "0\221\000\000");
+  x2("\000[\000[\000^0B\000&\000&0B\000]\000&\000&0B\000-0\223\000]\000\000", "0D\000\000", 0, 2);
+  n("\000[\000[\000^0B\000&\000&0B\000]\000&\000&0B\000-0\223\000]\000\000", "0B\000\000");
+  x2("\000[\000[\000^0B\000-0\223\000&\000&0D0F0H0J\000]\000&\000&\000[\000^0F\000-0K\000]\000]\000\000", "0M\000\000", 0, 2);
+  n("\000[\000[\000^0B\000-0\223\000&\000&0D0F0H0J\000]\000&\000&\000[\000^0F\000-0K\000]\000]\000\000", "0D\000\000");
+  x2("\000[\000^\000[\000^0B0D0F\000]\000&\000&\000[\000^0F0H0J\000]\000]\000\000", "0F\000\000", 0, 2);
+  x2("\000[\000^\000[\000^0B0D0F\000]\000&\000&\000[\000^0F0H0J\000]\000]\000\000", "0H\000\000", 0, 2);
+  n("\000[\000^\000[\000^0B0D0F\000]\000&\000&\000[\000^0F0H0J\000]\000]\000\000", "0K\000\000");
+  x2("\000[0B\000-\000&\000&\000-0B\000]\000\000", "\000-\000\000", 0, 2);
+  x2("\000[\000^\000[\000^\000a\000-\000z0B0D0F\000]\000&\000&\000[\000^\000b\000c\000d\000e\000f\000g0F0H0J\000]\000q\000-\000w\000]\000\000", "0H\000\000", 0, 2);
+  x2("\000[\000^\000[\000^\000a\000-\000z0B0D0F\000]\000&\000&\000[\000^\000b\000c\000d\000e\000f\000g0F0H0J\000]\000g\000-\000w\000]\000\000", "\000f\000\000", 0, 2);
+  x2("\000[\000^\000[\000^\000a\000-\000z0B0D0F\000]\000&\000&\000[\000^\000b\000c\000d\000e\000f\000g0F0H0J\000]\000g\000-\000w\000]\000\000", "\000g\000\000", 0, 2);
+  n("\000[\000^\000[\000^\000a\000-\000z0B0D0F\000]\000&\000&\000[\000^\000b\000c\000d\000e\000f\000g0F0H0J\000]\000g\000-\000w\000]\000\000", "\0002\000\000");
+  x2("\000a\000<\000b\000>0\3200\3740\2700\3470\3630n0\3000\2460\3630\3550\3740\311\000<\000\134\000/\000b\000>\000\000", "\000a\000<\000b\000>0\3200\3740\2700\3470\3630n0\3000\2460\3630\3550\3740\311\000<\000/\000b\000>\000\000", 0, 40);
+  x2("\000.\000<\000b\000>0\3200\3740\2700\3470\3630n0\3000\2460\3630\3550\3740\311\000<\000\134\000/\000b\000>\000\000", "\000a\000<\000b\000>0\3200\3740\2700\3470\3630n0\3000\2460\3630\3550\3740\311\000<\000/\000b\000>\000\000", 0, 40);
+  fprintf(stdout,
+       "\nRESULT   SUCC: %d,  FAIL: %d,  ERROR: %d      (by Oniguruma %s)\n",
+       nsucc, nfail, nerror, onig_version());
+
+#ifndef POSIX_TEST
+  onig_region_free(region, 1);
+  onig_end();
+#endif
+
+  return ((nfail == 0 && nerror == 0) ? 0 : -1);
+}
diff --git a/ext/mbstring/oniguruma/win32/Makefile b/ext/mbstring/oniguruma/win32/Makefile
new file mode 100644 (file)
index 0000000..d08722b
--- /dev/null
@@ -0,0 +1,200 @@
+# Oniguruma Makefile for Win32
+
+product_name = oniguruma
+
+CPPFLAGS = 
+CFLAGS = -O2 -nologo /W3
+LDFLAGS =
+LOADLIBES =
+ARLIB = lib
+ARLIB_FLAGS = -nologo
+ARDLL = cl
+ARDLL_FLAGS = -nologo -LD $(LINKFLAGS) -dll
+LINKFLAGS = -link -incremental:no -pdb:none
+
+INSTALL = install -c
+CP      = copy
+CC = cl
+DEFS = -DHAVE_CONFIG_H -DNOT_RUBY -DEXPORT
+RUBYDIR = ..
+
+subdirs = 
+
+libbase   = onig
+libname   = $(libbase)_s.lib
+dllname   = $(libbase).dll
+dlllib    = $(libbase).lib
+
+onigheaders  = oniguruma.h regint.h regparse.h regenc.h st.h
+posixheaders = onigposix.h
+headers      = $(posixheaders) $(onigheaders)
+
+onigobjs     = reggnu.obj regerror.obj regparse.obj regext.obj regcomp.obj \
+              regexec.obj regenc.obj regsyntax.obj regtrav.obj \
+              regversion.obj st.obj
+posixobjs    = regposix.obj regposerr.obj
+libobjs      = $(onigobjs) $(posixobjs)
+
+jp_objs      =  $(encdir)\euc_jp.obj $(encdir)\sjis.obj
+iso8859_objs =  $(encdir)\iso8859_1.obj  $(encdir)\iso8859_2.obj \
+               $(encdir)\iso8859_3.obj  $(encdir)\iso8859_4.obj \
+               $(encdir)\iso8859_5.obj  $(encdir)\iso8859_6.obj \
+               $(encdir)\iso8859_7.obj  $(encdir)\iso8859_8.obj \
+               $(encdir)\iso8859_9.obj  $(encdir)\iso8859_10.obj \
+               $(encdir)\iso8859_11.obj $(encdir)\iso8859_13.obj \
+               $(encdir)\iso8859_14.obj $(encdir)\iso8859_15.obj \
+               $(encdir)\iso8859_16.obj
+
+encobjs      =  $(encdir)\ascii.obj $(encdir)\utf8.obj \
+               $(encdir)\unicode.obj \
+               $(encdir)\utf16_be.obj $(encdir)\utf16_le.obj \
+               $(encdir)\utf32_be.obj $(encdir)\utf32_le.obj \
+               $(jp_objs) $(iso8859_objs) \
+               $(encdir)\euc_tw.obj $(encdir)\euc_kr.obj $(encdir)\big5.obj \
+               $(encdir)\gb18030.obj \
+               $(encdir)\koi8_r.obj  \
+               $(encdir)\cp1251.obj # $(encdir)\koi8.obj
+
+onigsources  = regerror.c regparse.c regext.c regcomp.c regexec.c regenc.c \
+              regsyntax.c regtrav.c regversion.c reggnu.c st.c
+posixsources = regposix.c regposerr.c
+libsources   = $(posixsources) $(onigsources)
+rubysources  = $(onigsources)
+
+encdir       = enc
+patchfiles   = re.c.168.patch re.c.181.patch
+distfiles    = README COPYING HISTORY \
+               Makefile.in configure.in config.h.in configure \
+               $(headers) $(libsources) $(patchfiles) \
+               test.rb testconv.rb
+testc        = testc
+testp        = testp
+
+makeargs = $(MFLAGS) CPPFLAGS='$(CPPFLAGS)' CFLAGS='$(CFLAGS)' CC='$(CC)'
+
+.SUFFIXES:
+.SUFFIXES: .obj .c .h .ps .dvi .info .texinfo
+
+.c.obj:
+       $(CC) $(CFLAGS) $(CPPFLAGS) $(DEFS) /I. /I.. /Fo$@ /c $<
+
+# targets
+default: all
+
+setup:
+       $(CP) win32\config.h config.h
+       $(CP) win32\testc.c  testc.c
+
+
+all: $(libname) $(dllname) 
+
+$(libname): $(libobjs) $(encobjs)
+       $(ARLIB) $(ARLIB_FLAGS) -out:$@ $(libobjs) $(encobjs)
+
+$(dllname): $(libobjs) $(encobjs)
+       $(ARDLL) $(libobjs) $(encobjs) -Fe$@ $(ARDLL_FLAGS)
+
+regparse.obj:  regparse.c $(onigheaders) config.h st.h
+regext.obj:    regext.c   $(onigheaders) config.h
+regtrav.obj:   regtrav.c  $(onigheaders) config.h
+regcomp.obj:   regcomp.c  $(onigheaders) config.h
+regexec.obj:   regexec.c  regint.h regenc.h oniguruma.h config.h
+reggnu.obj:    reggnu.c   regint.h regenc.h oniguruma.h config.h oniggnu.h
+regerror.obj:  regerror.c regint.h regenc.h oniguruma.h config.h
+regenc.obj:    regenc.c   regenc.h oniguruma.h config.h
+regsyntax.obj: regsyntax.c regint.h regenc.h oniguruma.h config.h
+regversion.obj: regversion.c oniguruma.h config.h
+regposix.obj:  regposix.c $(posixheaders) oniguruma.h config.h
+regposerr.obj: regposerr.c $(posixheaders) config.h
+st.obj:        st.c regint.h oniguruma.h config.h st.h
+
+$(encdir)\ascii.obj:      $(encdir)\ascii.c regenc.h config.h
+$(encdir)\unicode.obj:    $(encdir)\unicode.c regenc.h config.h
+$(encdir)\utf8.obj:       $(encdir)\utf8.c regenc.h config.h
+$(encdir)\utf16_be.obj:   $(encdir)\utf16_be.c regenc.h config.h
+$(encdir)\utf16_le.obj:   $(encdir)\utf16_le.c regenc.h config.h
+$(encdir)\utf32_be.obj:   $(encdir)\utf32_be.c regenc.h config.h
+$(encdir)\utf32_le.obj:   $(encdir)\utf32_le.c regenc.h config.h
+$(encdir)\euc_jp.obj:     $(encdir)\euc_jp.c regenc.h config.h
+$(encdir)\euc_tw.obj:     $(encdir)\euc_tw.c regenc.h config.h
+$(encdir)\euc_kr.obj:     $(encdir)\euc_kr.c regenc.h config.h
+$(encdir)\sjis.obj:       $(encdir)\sjis.c regenc.h config.h
+$(encdir)\iso8859_1.obj:  $(encdir)\iso8859_1.c regenc.h config.h
+$(encdir)\iso8859_2.obj:  $(encdir)\iso8859_2.c regenc.h config.h
+$(encdir)\iso8859_3.obj:  $(encdir)\iso8859_3.c regenc.h config.h
+$(encdir)\iso8859_4.obj:  $(encdir)\iso8859_4.c regenc.h config.h
+$(encdir)\iso8859_5.obj:  $(encdir)\iso8859_5.c regenc.h config.h
+$(encdir)\iso8859_6.obj:  $(encdir)\iso8859_6.c regenc.h config.h
+$(encdir)\iso8859_7.obj:  $(encdir)\iso8859_7.c regenc.h config.h
+$(encdir)\iso8859_8.obj:  $(encdir)\iso8859_8.c regenc.h config.h
+$(encdir)\iso8859_9.obj:  $(encdir)\iso8859_9.c regenc.h config.h
+$(encdir)\iso8859_10.obj: $(encdir)\iso8859_10.c regenc.h config.h
+$(encdir)\iso8859_11.obj: $(encdir)\iso8859_11.c regenc.h config.h
+$(encdir)\iso8859_13.obj: $(encdir)\iso8859_13.c regenc.h config.h
+$(encdir)\iso8859_14.obj: $(encdir)\iso8859_14.c regenc.h config.h
+$(encdir)\iso8859_15.obj: $(encdir)\iso8859_15.c regenc.h config.h
+$(encdir)\iso8859_16.obj: $(encdir)\iso8859_16.c regenc.h config.h
+$(encdir)\koi8.obj:       $(encdir)\koi8.c   regenc.h config.h
+$(encdir)\koi8_r.obj:     $(encdir)\koi8_r.c regenc.h config.h
+$(encdir)\cp1251.obj:     $(encdir)\cp1251.c regenc.h config.h
+$(encdir)\big5.obj:       $(encdir)\big5.c   regenc.h config.h
+$(encdir)\gb18030.obj:    $(encdir)\gb18030.c   regenc.h config.h
+
+
+# Ruby test
+rtest:
+       $(RUBYDIR)\win32\ruby -w -Ke test.rb
+
+# C library test
+ctest: $(testc)
+       .\$(testc)
+
+# POSIX C library test
+ptest: $(testp)
+       .\$(testp)
+
+$(testc): $(testc).c $(libname)
+       $(CC) -nologo -o $(testc) -DONIG_EXTERN=extern $(testc).c $(libname)
+
+$(testp): $(testc).c $(dlllib)
+       $(CC) -nologo -DPOSIX_TEST -o $(testp) $(testc).c $(dlllib)
+
+#$(testc)u.c: test.rb testconvu.rb
+#      ruby -Ke testconvu.rb test.rb > $@
+
+$(testc)u: $(testc)u.c $(libname)
+       $(CC) -nologo -o $(testc)u -DONIG_EXTERN=extern $(testc)u.c $(libname)
+
+clean:
+       del *.obj $(encdir)\*.obj *.lib *.exp *.dll $(testp).exe $(testc).exe $(testc).obj
+
+
+# backup file suffix
+SORIG = ruby_orig
+
+# ruby 1.9 source update
+19:
+       $(CP) regerror.c    $(RUBYDIR)
+       $(CP) regparse.c    $(RUBYDIR)
+       $(CP) regcomp.c     $(RUBYDIR)
+       $(CP) regexec.c     $(RUBYDIR)
+       $(CP) regenc.c      $(RUBYDIR)
+       $(CP) regint.h      $(RUBYDIR)
+       $(CP) regparse.h    $(RUBYDIR)
+       $(CP) regenc.h      $(RUBYDIR)
+       $(CP) oniguruma.h   $(RUBYDIR)
+       $(CP) enc\ascii.c   $(RUBYDIR)
+       $(CP) enc\utf8.c    $(RUBYDIR)
+       $(CP) enc\euc_jp.c  $(RUBYDIR)
+       $(CP) enc\sjis.c    $(RUBYDIR)
+       $(CP) enc\unicode.c $(RUBYDIR)
+
+
+samples: all
+       $(CC) $(CFLAGS) -I. -o simple  sample\simple.c  $(dlllib)
+       $(CC) $(CFLAGS) -I. -o posix   sample\posix.c   $(dlllib)
+       $(CC) $(CFLAGS) -I. -o names   sample\names.c   $(dlllib)
+       $(CC) $(CFLAGS) -I. -o listcap sample\listcap.c $(dlllib)
+       $(CC) $(CFLAGS) -I. -o sql     sample\sql.c     $(dlllib)
+       $(CC) $(CFLAGS) -I. -o encode  sample\encode.c  $(dlllib)
+       $(CC) $(CFLAGS) -I. -o syntax  sample\syntax.c  $(dlllib)
diff --git a/ext/mbstring/oniguruma/win32/testc.c b/ext/mbstring/oniguruma/win32/testc.c
new file mode 100644 (file)
index 0000000..acc1318
--- /dev/null
@@ -0,0 +1,863 @@
+/*
+ * This program was generated by testconv.rb.
+ */
+#include "config.h"
+#ifdef ONIG_ESCAPE_UCHAR_COLLISION
+#undef ONIG_ESCAPE_UCHAR_COLLISION
+#endif
+#include <stdio.h>
+
+#ifdef POSIX_TEST
+#include "onigposix.h"
+#else
+#include "oniguruma.h"
+#endif
+
+#ifdef HAVE_STRING_H
+# include <string.h>
+#else
+# include <strings.h>
+#endif
+
+#define SLEN(s)  strlen(s)
+
+static int nsucc  = 0;
+static int nfail  = 0;
+static int nerror = 0;
+
+static FILE* err_file;
+
+#ifndef POSIX_TEST
+static OnigRegion* region;
+#endif
+
+static void xx(char* pattern, char* str, int from, int to, int mem, int not)
+{
+  int r;
+
+#ifdef POSIX_TEST
+  regex_t reg;
+  char buf[200];
+  regmatch_t pmatch[25];
+
+  r = regcomp(&reg, pattern, REG_EXTENDED | REG_NEWLINE);
+  if (r) {
+    regerror(r, &reg, buf, sizeof(buf));
+    fprintf(err_file, "ERROR: %s\n", buf);
+    nerror++;
+    return ;
+  }
+
+  r = regexec(&reg, str, reg.re_nsub + 1, pmatch, 0);
+  if (r != 0 && r != REG_NOMATCH) {
+    regerror(r, &reg, buf, sizeof(buf));
+    fprintf(err_file, "ERROR: %s\n", buf);
+    nerror++;
+    return ;
+  }
+
+  if (r == REG_NOMATCH) {
+    if (not) {
+      fprintf(stdout, "OK(N): /%s/ '%s'\n", pattern, str);
+      nsucc++;
+    }
+    else {
+      fprintf(stdout, "FAIL: /%s/ '%s'\n", pattern, str);
+      nfail++;
+    }
+  }
+  else {
+    if (not) {
+      fprintf(stdout, "FAIL(N): /%s/ '%s'\n", pattern, str);
+      nfail++;
+    }
+    else {
+      if (pmatch[mem].rm_so == from && pmatch[mem].rm_eo == to) {
+        fprintf(stdout, "OK: /%s/ '%s'\n", pattern, str);
+        nsucc++;
+      }
+      else {
+        fprintf(stdout, "FAIL: /%s/ '%s' %d-%d : %d-%d\n", pattern, str,
+               from, to, pmatch[mem].rm_so, pmatch[mem].rm_eo);
+        nfail++;
+      }
+    }
+  }
+  regfree(&reg);
+
+#else
+  regex_t* reg;
+  OnigErrorInfo einfo;
+
+  r = onig_new(&reg, (UChar* )pattern, (UChar* )(pattern + SLEN(pattern)),
+              ONIG_OPTION_DEFAULT, ONIG_ENCODING_SJIS, ONIG_SYNTAX_DEFAULT, &einfo);
+  if (r) {
+    char s[ONIG_MAX_ERROR_MESSAGE_LEN];
+    onig_error_code_to_str((UChar* )s, r, &einfo);
+    fprintf(err_file, "ERROR: %s\n", s);
+    nerror++;
+    return ;
+  }
+
+  r = onig_search(reg, (UChar* )str, (UChar* )(str + SLEN(str)),
+                 (UChar* )str, (UChar* )(str + SLEN(str)),
+                 region, ONIG_OPTION_NONE);
+  if (r < ONIG_MISMATCH) {
+    char s[ONIG_MAX_ERROR_MESSAGE_LEN];
+    onig_error_code_to_str((UChar* )s, r);
+    fprintf(err_file, "ERROR: %s\n", s);
+    nerror++;
+    return ;
+  }
+
+  if (r == ONIG_MISMATCH) {
+    if (not) {
+      fprintf(stdout, "OK(N): /%s/ '%s'\n", pattern, str);
+      nsucc++;
+    }
+    else {
+      fprintf(stdout, "FAIL: /%s/ '%s'\n", pattern, str);
+      nfail++;
+    }
+  }
+  else {
+    if (not) {
+      fprintf(stdout, "FAIL(N): /%s/ '%s'\n", pattern, str);
+      nfail++;
+    }
+    else {
+      if (region->beg[mem] == from && region->end[mem] == to) {
+        fprintf(stdout, "OK: /%s/ '%s'\n", pattern, str);
+        nsucc++;
+      }
+      else {
+        fprintf(stdout, "FAIL: /%s/ '%s' %d-%d : %d-%d\n", pattern, str,
+               from, to, region->beg[mem], region->end[mem]);
+        nfail++;
+      }
+    }
+  }
+  onig_free(reg);
+#endif
+}
+
+static void x2(char* pattern, char* str, int from, int to)
+{
+  xx(pattern, str, from, to, 0, 0);
+}
+
+static void x3(char* pattern, char* str, int from, int to, int mem)
+{
+  xx(pattern, str, from, to, mem, 0);
+}
+
+static void n(char* pattern, char* str)
+{
+  xx(pattern, str, 0, 0, 0, 1);
+}
+
+extern int main(int argc, char* argv[])
+{
+  err_file = stdout;
+
+#ifdef POSIX_TEST
+  reg_set_encoding(REG_POSIX_ENCODING_SJIS);
+#else
+  region = onig_region_new();
+#endif
+
+  x2("", "", 0, 0);
+  x2("^", "", 0, 0);
+  x2("$", "", 0, 0);
+  x2("\\G", "", 0, 0);
+  x2("\\A", "", 0, 0);
+  x2("\\Z", "", 0, 0);
+  x2("\\z", "", 0, 0);
+  x2("^$", "", 0, 0);
+  x2("\\ca", "\001", 0, 1);
+  x2("\\C-b", "\002", 0, 1);
+  x2("\\c\\\\", "\034", 0, 1);
+  x2("q[\\c\\\\]", "q\034", 0, 2);
+  x2("", "a", 0, 0);
+  x2("a", "a", 0, 1);
+  x2("\\x61", "a", 0, 1);
+  x2("aa", "aa", 0, 2);
+  x2("aaa", "aaa", 0, 3);
+  x2("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", 0, 35);
+  x2("ab", "ab", 0, 2);
+  x2("b", "ab", 1, 2);
+  x2("bc", "abc", 1, 3);
+  x2("(?i:#RET#)", "#INS##RET#", 5, 10);
+  x2("\\17", "\017", 0, 1);
+  x2("\\x1f", "\x1f", 0, 1);
+  x2("a(?#....\\\\JJJJ)b", "ab", 0, 2);
+  x2("(?x)  G (o O(?-x)oO) g L", "GoOoOgLe", 0, 7);
+  x2(".", "a", 0, 1);
+  n(".", "");
+  x2("..", "ab", 0, 2);
+  x2("\\w", "e", 0, 1);
+  n("\\W", "e");
+  x2("\\s", " ", 0, 1);
+  x2("\\S", "b", 0, 1);
+  x2("\\d", "4", 0, 1);
+  n("\\D", "4");
+  x2("\\b", "z ", 0, 0);
+  x2("\\b", " z", 1, 1);
+  x2("\\B", "zz ", 1, 1);
+  x2("\\B", "z ", 2, 2);
+  x2("\\B", " z", 0, 0);
+  x2("[ab]", "b", 0, 1);
+  n("[ab]", "c");
+  x2("[a-z]", "t", 0, 1);
+  n("[^a]", "a");
+  x2("[^a]", "\n", 0, 1);
+  x2("[]]", "]", 0, 1);
+  n("[^]]", "]");
+  x2("[\\^]+", "0^^1", 1, 3);
+  x2("[b-]", "b", 0, 1);
+  x2("[b-]", "-", 0, 1);
+  x2("[\\w]", "z", 0, 1);
+  n("[\\w]", " ");
+  x2("[\\W]", "b$", 1, 2);
+  x2("[\\d]", "5", 0, 1);
+  n("[\\d]", "e");
+  x2("[\\D]", "t", 0, 1);
+  n("[\\D]", "3");
+  x2("[\\s]", " ", 0, 1);
+  n("[\\s]", "a");
+  x2("[\\S]", "b", 0, 1);
+  n("[\\S]", " ");
+  x2("[\\w\\d]", "2", 0, 1);
+  n("[\\w\\d]", " ");
+  x2("[[:upper:]]", "B", 0, 1);
+  x2("[*[:xdigit:]+]", "+", 0, 1);
+  x2("[*[:xdigit:]+]", "GHIKK-9+*", 6, 7);
+  x2("[*[:xdigit:]+]", "-@^+", 3, 4);
+  n("[[:upper]]", "A");
+  x2("[[:upper]]", ":", 0, 1);
+  x2("[\\044-\\047]", "\046", 0, 1);
+  x2("[\\x5a-\\x5c]", "\x5b", 0, 1);
+  x2("[\\x6A-\\x6D]", "\x6c", 0, 1);
+  n("[\\x6A-\\x6D]", "\x6E");
+  n("^[0-9A-F]+ 0+ UNDEF ", "75F 00000000 SECT14A notype ()    External    | _rb_apply");
+  x2("[\\[]", "[", 0, 1);
+  x2("[\\]]", "]", 0, 1);
+  x2("[&]", "&", 0, 1);
+  x2("[[ab]]", "b", 0, 1);
+  x2("[[ab]c]", "c", 0, 1);
+  n("[[^a]]", "a");
+  n("[^[a]]", "a");
+  x2("[[ab]&&bc]", "b", 0, 1);
+  n("[[ab]&&bc]", "a");
+  n("[[ab]&&bc]", "c");
+  x2("[a-z&&b-y&&c-x]", "w", 0, 1);
+  n("[^a-z&&b-y&&c-x]", "w");
+  x2("[[^a&&a]&&a-z]", "b", 0, 1);
+  n("[[^a&&a]&&a-z]", "a");
+  x2("[[^a-z&&bcdef]&&[^c-g]]", "h", 0, 1);
+  n("[[^a-z&&bcdef]&&[^c-g]]", "c");
+  x2("[^[^abc]&&[^cde]]", "c", 0, 1);
+  x2("[^[^abc]&&[^cde]]", "e", 0, 1);
+  n("[^[^abc]&&[^cde]]", "f");
+  x2("[a-&&-a]", "-", 0, 1);
+  n("[a\\-&&\\-a]", "&");
+  n("\\wabc", " abc");
+  x2("a\\Wbc", "a bc", 0, 4);
+  x2("a.b.c", "aabbc", 0, 5);
+  x2(".\\wb\\W..c", "abb bcc", 0, 7);
+  x2("\\s\\wzzz", " zzzz", 0, 5);
+  x2("aa.b", "aabb", 0, 4);
+  n(".a", "ab");
+  x2(".a", "aa", 0, 2);
+  x2("^a", "a", 0, 1);
+  x2("^a$", "a", 0, 1);
+  x2("^\\w$", "a", 0, 1);
+  n("^\\w$", " ");
+  x2("^\\wab$", "zab", 0, 3);
+  x2("^\\wabcdef$", "zabcdef", 0, 7);
+  x2("^\\w...def$", "zabcdef", 0, 7);
+  x2("\\w\\w\\s\\Waaa\\d", "aa  aaa4", 0, 8);
+  x2("\\A\\Z", "", 0, 0);
+  x2("\\Axyz", "xyz", 0, 3);
+  x2("xyz\\Z", "xyz", 0, 3);
+  x2("xyz\\z", "xyz", 0, 3);
+  x2("a\\Z", "a", 0, 1);
+  x2("\\Gaz", "az", 0, 2);
+  n("\\Gz", "bza");
+  n("az\\G", "az");
+  n("az\\A", "az");
+  n("a\\Az", "az");
+  x2("\\^\\$", "^$", 0, 2);
+  x2("^x?y", "xy", 0, 2);
+  x2("^(x?y)", "xy", 0, 2);
+  x2("\\w", "_", 0, 1);
+  n("\\W", "_");
+  x2("(?=z)z", "z", 0, 1);
+  n("(?=z).", "a");
+  x2("(?!z)a", "a", 0, 1);
+  n("(?!z)a", "z");
+  x2("(?i:a)", "a", 0, 1);
+  x2("(?i:a)", "A", 0, 1);
+  x2("(?i:A)", "a", 0, 1);
+  n("(?i:A)", "b");
+  x2("(?i:[A-Z])", "a", 0, 1);
+  x2("(?i:[f-m])", "H", 0, 1);
+  x2("(?i:[f-m])", "h", 0, 1);
+  n("(?i:[f-m])", "e");
+  x2("(?i:[A-c])", "D", 0, 1);
+  n("(?i:[^a-z])", "A");
+  n("(?i:[^a-z])", "a");
+  x2("(?i:[!-k])", "Z", 0, 1);
+  x2("(?i:[!-k])", "7", 0, 1);
+  x2("(?i:[T-}])", "b", 0, 1);
+  x2("(?i:[T-}])", "{", 0, 1);
+  x2("(?i:\\?a)", "?A", 0, 2);
+  x2("(?i:\\*A)", "*a", 0, 2);
+  n(".", "\n");
+  x2("(?m:.)", "\n", 0, 1);
+  x2("(?m:a.)", "a\n", 0, 2);
+  x2("(?m:.b)", "a\nb", 1, 3);
+  x2(".*abc", "dddabdd\nddabc", 8, 13);
+  x2("(?m:.*abc)", "dddabddabc", 0, 10);
+  n("(?i)(?-i)a", "A");
+  n("(?i)(?-i:a)", "A");
+  x2("a?", "", 0, 0);
+  x2("a?", "b", 0, 0);
+  x2("a?", "a", 0, 1);
+  x2("a*", "", 0, 0);
+  x2("a*", "a", 0, 1);
+  x2("a*", "aaa", 0, 3);
+  x2("a*", "baaaa", 0, 0);
+  n("a+", "");
+  x2("a+", "a", 0, 1);
+  x2("a+", "aaaa", 0, 4);
+  x2("a+", "aabbb", 0, 2);
+  x2("a+", "baaaa", 1, 5);
+  x2(".?", "", 0, 0);
+  x2(".?", "f", 0, 1);
+  x2(".?", "\n", 0, 0);
+  x2(".*", "", 0, 0);
+  x2(".*", "abcde", 0, 5);
+  x2(".+", "z", 0, 1);
+  x2(".+", "zdswer\n", 0, 6);
+  x2("(.*)a\\1f", "babfbac", 0, 4);
+  x2("(.*)a\\1f", "bacbabf", 3, 7);
+  x2("((.*)a\\2f)", "bacbabf", 3, 7);
+  x2("(.*)a\\1f", "baczzzzzz\nbazz\nzzzzbabf", 19, 23);
+  x2("a|b", "a", 0, 1);
+  x2("a|b", "b", 0, 1);
+  x2("|a", "a", 0, 0);
+  x2("(|a)", "a", 0, 0);
+  x2("ab|bc", "ab", 0, 2);
+  x2("ab|bc", "bc", 0, 2);
+  x2("z(?:ab|bc)", "zbc", 0, 3);
+  x2("a(?:ab|bc)c", "aabc", 0, 4);
+  x2("ab|(?:ac|az)", "az", 0, 2);
+  x2("a|b|c", "dc", 1, 2);
+  x2("a|b|cd|efg|h|ijk|lmn|o|pq|rstuvwx|yz", "pqr", 0, 2);
+  n("a|b|cd|efg|h|ijk|lmn|o|pq|rstuvwx|yz", "mn");
+  x2("a|^z", "ba", 1, 2);
+  x2("a|^z", "za", 0, 1);
+  x2("a|\\Gz", "bza", 2, 3);
+  x2("a|\\Gz", "za", 0, 1);
+  x2("a|\\Az", "bza", 2, 3);
+  x2("a|\\Az", "za", 0, 1);
+  x2("a|b\\Z", "ba", 1, 2);
+  x2("a|b\\Z", "b", 0, 1);
+  x2("a|b\\z", "ba", 1, 2);
+  x2("a|b\\z", "b", 0, 1);
+  x2("\\w|\\s", " ", 0, 1);
+  n("\\w|\\w", " ");
+  x2("\\w|%", "%", 0, 1);
+  x2("\\w|[&$]", "&", 0, 1);
+  x2("[b-d]|[^e-z]", "a", 0, 1);
+  x2("(?:a|[c-f])|bz", "dz", 0, 1);
+  x2("(?:a|[c-f])|bz", "bz", 0, 2);
+  x2("abc|(?=zz)..f", "zzf", 0, 3);
+  x2("abc|(?!zz)..f", "abf", 0, 3);
+  x2("(?=za)..a|(?=zz)..a", "zza", 0, 3);
+  n("(?>a|abd)c", "abdc");
+  x2("(?>abd|a)c", "abdc", 0, 4);
+  x2("a?|b", "a", 0, 1);
+  x2("a?|b", "b", 0, 0);
+  x2("a?|b", "", 0, 0);
+  x2("a*|b", "aa", 0, 2);
+  x2("a*|b*", "ba", 0, 0);
+  x2("a*|b*", "ab", 0, 1);
+  x2("a+|b*", "", 0, 0);
+  x2("a+|b*", "bbb", 0, 3);
+  x2("a+|b*", "abbb", 0, 1);
+  n("a+|b+", "");
+  x2("(a|b)?", "b", 0, 1);
+  x2("(a|b)*", "ba", 0, 2);
+  x2("(a|b)+", "bab", 0, 3);
+  x2("(ab|ca)+", "caabbc", 0, 4);
+  x2("(ab|ca)+", "aabca", 1, 5);
+  x2("(ab|ca)+", "abzca", 0, 2);
+  x2("(a|bab)+", "ababa", 0, 5);
+  x2("(a|bab)+", "ba", 1, 2);
+  x2("(a|bab)+", "baaaba", 1, 4);
+  x2("(?:a|b)(?:a|b)", "ab", 0, 2);
+  x2("(?:a*|b*)(?:a*|b*)", "aaabbb", 0, 3);
+  x2("(?:a*|b*)(?:a+|b+)", "aaabbb", 0, 6);
+  x2("(?:a+|b+){2}", "aaabbb", 0, 6);
+  x2("h{0,}", "hhhh", 0, 4);
+  x2("(?:a+|b+){1,2}", "aaabbb", 0, 6);
+  n("ax{2}*a", "0axxxa1");
+  n("a.{0,2}a", "0aXXXa0");
+  n("a.{0,2}?a", "0aXXXa0");
+  n("a.{0,2}?a", "0aXXXXa0");
+  x2("^a{2,}?a$", "aaa", 0, 3);
+  x2("^[a-z]{2,}?$", "aaa", 0, 3);
+  x2("(?:a+|\\Ab*)cc", "cc", 0, 2);
+  n("(?:a+|\\Ab*)cc", "abcc");
+  x2("(?:^a+|b+)*c", "aabbbabc", 6, 8);
+  x2("(?:^a+|b+)*c", "aabbbbc", 0, 7);
+  x2("a|(?i)c", "C", 0, 1);
+  x2("(?i)c|a", "C", 0, 1);
+  x2("(?i)c|a", "A", 0, 1);
+  x2("(?i:c)|a", "C", 0, 1);
+  n("(?i:c)|a", "A");
+  x2("[abc]?", "abc", 0, 1);
+  x2("[abc]*", "abc", 0, 3);
+  x2("[^abc]*", "abc", 0, 0);
+  n("[^abc]+", "abc");
+  x2("a?\?", "aaa", 0, 0);
+  x2("ba?\?b", "bab", 0, 3);
+  x2("a*?", "aaa", 0, 0);
+  x2("ba*?", "baa", 0, 1);
+  x2("ba*?b", "baab", 0, 4);
+  x2("a+?", "aaa", 0, 1);
+  x2("ba+?", "baa", 0, 2);
+  x2("ba+?b", "baab", 0, 4);
+  x2("(?:a?)?\?", "a", 0, 0);
+  x2("(?:a?\?)?", "a", 0, 0);
+  x2("(?:a?)+?", "aaa", 0, 1);
+  x2("(?:a+)?\?", "aaa", 0, 0);
+  x2("(?:a+)?\?b", "aaab", 0, 4);
+  x2("(?:ab)?{2}", "", 0, 0);
+  x2("(?:ab)?{2}", "ababa", 0, 4);
+  x2("(?:ab)*{0}", "ababa", 0, 0);
+  x2("(?:ab){3,}", "abababab", 0, 8);
+  n("(?:ab){3,}", "abab");
+  x2("(?:ab){2,4}", "ababab", 0, 6);
+  x2("(?:ab){2,4}", "ababababab", 0, 8);
+  x2("(?:ab){2,4}?", "ababababab", 0, 4);
+  x2("(?:ab){,}", "ab{,}", 0, 5);
+  x2("(?:abc)+?{2}", "abcabcabc", 0, 6);
+  x2("(?:X*)(?i:xa)", "XXXa", 0, 4);
+  x2("(d+)([^abc]z)", "dddz", 0, 4);
+  x2("([^abc]*)([^abc]z)", "dddz", 0, 4);
+  x2("(\\w+)(\\wz)", "dddz", 0, 4);
+  x3("(a)", "a", 0, 1, 1);
+  x3("(ab)", "ab", 0, 2, 1);
+  x2("((ab))", "ab", 0, 2);
+  x3("((ab))", "ab", 0, 2, 1);
+  x3("((ab))", "ab", 0, 2, 2);
+  x3("((((((((((((((((((((ab))))))))))))))))))))", "ab", 0, 2, 20);
+  x3("(ab)(cd)", "abcd", 0, 2, 1);
+  x3("(ab)(cd)", "abcd", 2, 4, 2);
+  x3("()(a)bc(def)ghijk", "abcdefghijk", 3, 6, 3);
+  x3("(()(a)bc(def)ghijk)", "abcdefghijk", 3, 6, 4);
+  x2("(^a)", "a", 0, 1);
+  x3("(a)|(a)", "ba", 1, 2, 1);
+  x3("(^a)|(a)", "ba", 1, 2, 2);
+  x3("(a?)", "aaa", 0, 1, 1);
+  x3("(a*)", "aaa", 0, 3, 1);
+  x3("(a*)", "", 0, 0, 1);
+  x3("(a+)", "aaaaaaa", 0, 7, 1);
+  x3("(a+|b*)", "bbbaa", 0, 3, 1);
+  x3("(a+|b?)", "bbbaa", 0, 1, 1);
+  x3("(abc)?", "abc", 0, 3, 1);
+  x3("(abc)*", "abc", 0, 3, 1);
+  x3("(abc)+", "abc", 0, 3, 1);
+  x3("(xyz|abc)+", "abc", 0, 3, 1);
+  x3("([xyz][abc]|abc)+", "abc", 0, 3, 1);
+  x3("((?i:abc))", "AbC", 0, 3, 1);
+  x2("(abc)(?i:\\1)", "abcABC", 0, 6);
+  x3("((?m:a.c))", "a\nc", 0, 3, 1);
+  x3("((?=az)a)", "azb", 0, 1, 1);
+  x3("abc|(.abd)", "zabd", 0, 4, 1);
+  x2("(?:abc)|(ABC)", "abc", 0, 3);
+  x3("(?i:(abc))|(zzz)", "ABC", 0, 3, 1);
+  x3("a*(.)", "aaaaz", 4, 5, 1);
+  x3("a*?(.)", "aaaaz", 0, 1, 1);
+  x3("a*?(c)", "aaaac", 4, 5, 1);
+  x3("[bcd]a*(.)", "caaaaz", 5, 6, 1);
+  x3("(\\Abb)cc", "bbcc", 0, 2, 1);
+  n("(\\Abb)cc", "zbbcc");
+  x3("(^bb)cc", "bbcc", 0, 2, 1);
+  n("(^bb)cc", "zbbcc");
+  x3("cc(bb$)", "ccbb", 2, 4, 1);
+  n("cc(bb$)", "ccbbb");
+  n("(\\1)", "");
+  n("\\1(a)", "aa");
+  n("(a(b)\\1)\\2+", "ababb");
+  n("(?:(?:\\1|z)(a))+$", "zaa");
+  x2("(?:(?:\\1|z)(a))+$", "zaaa", 0, 4);
+  x2("(a)(?=\\1)", "aa", 0, 1);
+  n("(a)$|\\1", "az");
+  x2("(a)\\1", "aa", 0, 2);
+  n("(a)\\1", "ab");
+  x2("(a?)\\1", "aa", 0, 2);
+  x2("(a?\?)\\1", "aa", 0, 0);
+  x2("(a*)\\1", "aaaaa", 0, 4);
+  x3("(a*)\\1", "aaaaa", 0, 2, 1);
+  x2("a(b*)\\1", "abbbb", 0, 5);
+  x2("a(b*)\\1", "ab", 0, 1);
+  x2("(a*)(b*)\\1\\2", "aaabbaaabb", 0, 10);
+  x2("(a*)(b*)\\2", "aaabbbb", 0, 7);
+  x2("(((((((a*)b))))))c\\7", "aaabcaaa", 0, 8);
+  x3("(((((((a*)b))))))c\\7", "aaabcaaa", 0, 3, 7);
+  x2("(a)(b)(c)\\2\\1\\3", "abcbac", 0, 6);
+  x2("([a-d])\\1", "cc", 0, 2);
+  x2("(\\w\\d\\s)\\1", "f5 f5 ", 0, 6);
+  n("(\\w\\d\\s)\\1", "f5 f5");
+  x2("(who|[a-c]{3})\\1", "whowho", 0, 6);
+  x2("...(who|[a-c]{3})\\1", "abcwhowho", 0, 9);
+  x2("(who|[a-c]{3})\\1", "cbccbc", 0, 6);
+  x2("(^a)\\1", "aa", 0, 2);
+  n("(^a)\\1", "baa");
+  n("(a$)\\1", "aa");
+  n("(ab\\Z)\\1", "ab");
+  x2("(a*\\Z)\\1", "a", 1, 1);
+  x2(".(a*\\Z)\\1", "ba", 1, 2);
+  x3("(.(abc)\\2)", "zabcabc", 0, 7, 1);
+  x3("(.(..\\d.)\\2)", "z12341234", 0, 9, 1);
+  x2("((?i:az))\\1", "AzAz", 0, 4);
+  n("((?i:az))\\1", "Azaz");
+  x2("(?<=a)b", "ab", 1, 2);
+  n("(?<=a)b", "bb");
+  x2("(?<=a|b)b", "bb", 1, 2);
+  x2("(?<=a|bc)b", "bcb", 2, 3);
+  x2("(?<=a|bc)b", "ab", 1, 2);
+  x2("(?<=a|bc||defghij|klmnopq|r)z", "rz", 1, 2);
+  x2("(a)\\g<1>", "aa", 0, 2);
+  x2("(?<!a)b", "cb", 1, 2);
+  n("(?<!a)b", "ab");
+  x2("(?<!a|bc)b", "bbb", 0, 1);
+  n("(?<!a|bc)z", "bcz");
+  x2("(?<name1>a)", "a", 0, 1);
+  x2("(?<name_2>ab)\\g<name_2>", "abab", 0, 4);
+  x2("(?<name_3>.zv.)\\k<name_3>", "azvbazvb", 0, 8);
+  x2("(?<=\\g<ab>)|-\\zEND (?<ab>XyZ)", "XyZ", 3, 3);
+  x2("(?<n>|a\\g<n>)+", "", 0, 0);
+  x2("(?<n>|\\(\\g<n>\\))+$", "()(())", 0, 6);
+  x3("\\g<n>(?<n>.){0}", "X", 0, 1, 1);
+  x2("\\g<n>(abc|df(?<n>.YZ){2,8}){0}", "XYZ", 0, 3);
+  x2("\\A(?<n>(a\\g<n>)|)\\z", "aaaa", 0, 4);
+  x2("(?<n>|\\g<m>\\g<n>)\\z|\\zEND (?<m>a|(b)\\g<m>)", "bbbbabba", 0, 8);
+  x2("(?<name1240>\\w+\\sx)a+\\k<name1240>", "  fg xaaaaaaaafg x", 2, 18);
+  x3("(z)()()(?<_9>a)\\g<_9>", "zaa", 2, 3, 1);
+  x2("(.)(((?<_>a)))\\k<_>", "zaa", 0, 3);
+  x2("((?<name1>\\d)|(?<name2>\\w))(\\k<name1>|\\k<name2>)", "ff", 0, 2);
+  x2("(?:(?<x>)|(?<x>efg))\\k<x>", "", 0, 0);
+  x2("(?:(?<x>abc)|(?<x>efg))\\k<x>", "abcefgefg", 3, 9);
+  n("(?:(?<x>abc)|(?<x>efg))\\k<x>", "abcefg");
+  x2("(?:(?<n1>.)|(?<n1>..)|(?<n1>...)|(?<n1>....)|(?<n1>.....)|(?<n1>......)|(?<n1>.......)|(?<n1>........)|(?<n1>.........)|(?<n1>..........)|(?<n1>...........)|(?<n1>............)|(?<n1>.............)|(?<n1>..............))\\k<n1>$", "a-pyumpyum", 2, 10);
+  x3("(?:(?<n1>.)|(?<n1>..)|(?<n1>...)|(?<n1>....)|(?<n1>.....)|(?<n1>......)|(?<n1>.......)|(?<n1>........)|(?<n1>.........)|(?<n1>..........)|(?<n1>...........)|(?<n1>............)|(?<n1>.............)|(?<n1>..............))\\k<n1>$", "xxxxabcdefghijklmnabcdefghijklmn", 4, 18, 14);
+  x3("(?<name1>)(?<name2>)(?<name3>)(?<name4>)(?<name5>)(?<name6>)(?<name7>)(?<name8>)(?<name9>)(?<name10>)(?<name11>)(?<name12>)(?<name13>)(?<name14>)(?<name15>)(?<name16>aaa)(?<name17>)$", "aaa", 0, 3, 16);
+  x2("(?<foo>a|\\(\\g<foo>\\))", "a", 0, 1);
+  x2("(?<foo>a|\\(\\g<foo>\\))", "((((((a))))))", 0, 13);
+  x3("(?<foo>a|\\(\\g<foo>\\))", "((((((((a))))))))", 0, 17, 1);
+  x2("\\g<bar>|\\zEND(?<bar>.*abc$)", "abcxxxabc", 0, 9);
+  x2("\\g<1>|\\zEND(.a.)", "bac", 0, 3);
+  x3("\\g<_A>\\g<_A>|\\zEND(.a.)(?<_A>.b.)", "xbxyby", 3, 6, 1);
+  x2("\\A(?:\\g<pon>|\\g<pan>|\\zEND  (?<pan>a|c\\g<pon>c)(?<pon>b|d\\g<pan>d))$", "cdcbcdc", 0, 7);
+  x2("\\A(?<n>|a\\g<m>)\\z|\\zEND (?<m>\\g<n>)", "aaaa", 0, 4);
+  x2("(?<n>(a|b\\g<n>c){3,5})", "baaaaca", 1, 5);
+  x2("(?<n>(a|b\\g<n>c){3,5})", "baaaacaaaaa", 0, 10);
+  x2("(?<pare>\\(([^\\(\\)]++|\\g<pare>)*+\\))", "((a))", 0, 5);
+  x2("()*\\1", "", 0, 0);
+  x2("(?:()|())*\\1\\2", "", 0, 0);
+  x3("(?:\\1a|())*", "a", 0, 0, 1);
+  x2("x((.)*)*x", "0x1x2x3", 1, 6);
+  x2("x((.)*)*x(?i:\\1)\\Z", "0x1x2x1X2", 1, 9);
+  x2("(?:()|()|()|()|()|())*\\2\\5", "", 0, 0);
+  x2("(?:()|()|()|(x)|()|())*\\2b\\5", "b", 0, 1);
+  x2("\\xED\\xF2", "\xed\xf2", 0, 2);
+  x2("", "\82 ", 0, 0);
+  x2("\82 ", "\82 ", 0, 2);
+  n("\82¢", "\82 ");
+  x2("\82¤\82¤", "\82¤\82¤", 0, 4);
+  x2("\82 \82¢\82¤", "\82 \82¢\82¤", 0, 6);
+  x2("\82±\82±\82±\82±\82±\82±\82±\82±\82±\82±\82±\82±\82±\82±\82±\82±\82±\82±\82±\82±\82±\82±\82±\82±\82±\82±\82±\82±\82±\82±\82±\82±\82±\82±\82±", "\82±\82±\82±\82±\82±\82±\82±\82±\82±\82±\82±\82±\82±\82±\82±\82±\82±\82±\82±\82±\82±\82±\82±\82±\82±\82±\82±\82±\82±\82±\82±\82±\82±\82±\82±", 0, 70);
+  x2("\82 ", "\82¢\82 ", 2, 4);
+  x2("\82¢\82¤", "\82 \82¢\82¤", 2, 6);
+  x2("\\xca\\xb8", "\xca\xb8", 0, 2);
+  x2(".", "\82 ", 0, 2);
+  x2("..", "\82©\82«", 0, 4);
+  x2("\\w", "\82¨", 0, 2);
+  n("\\W", "\82 ");
+  x2("[\\W]", "\82¤$", 2, 3);
+  x2("\\S", "\82»", 0, 2);
+  x2("\\S", "\8a¿", 0, 2);
+  x2("\\b", "\8bC ", 0, 0);
+  x2("\\b", " \82Ù", 1, 1);
+  x2("\\B", "\82¹\82» ", 2, 2);
+  x2("\\B", "\82¤ ", 3, 3);
+  x2("\\B", " \82¢", 0, 0);
+  x2("[\82½\82¿]", "\82¿", 0, 2);
+  n("[\82È\82É]", "\82Ê");
+  x2("[\82¤-\82¨]", "\82¦", 0, 2);
+  n("[^\82¯]", "\82¯");
+  x2("[\\w]", "\82Ë", 0, 2);
+  n("[\\d]", "\82Ó");
+  x2("[\\D]", "\82Í", 0, 2);
+  n("[\\s]", "\82­");
+  x2("[\\S]", "\82Ö", 0, 2);
+  x2("[\\w\\d]", "\82æ", 0, 2);
+  x2("[\\w\\d]", "   \82æ", 3, 5);
+  n("\\w\8bS\8eÔ", " \8bS\8eÔ");
+  x2("\8bS\\W\8eÔ", "\8b\8eÔ", 0, 5);
+  x2("\82 .\82¢.\82¤", "\82 \82 \82¢\82¢\82¤", 0, 10);
+  x2(".\\w\82¤\\W..\82¼", "\82¦\82¤\82¤ \82¤\82¼\82¼", 0, 13);
+  x2("\\s\\w\82±\82±\82±", " \82±\82±\82±\82±", 0, 9);
+  x2("\82 \82 .\82¯", "\82 \82 \82¯\82¯", 0, 8);
+  n(".\82¢", "\82¢\82¦");
+  x2(".\82¨", "\82¨\82¨", 0, 4);
+  x2("^\82 ", "\82 ", 0, 2);
+  x2("^\82Þ$", "\82Þ", 0, 2);
+  x2("^\\w$", "\82É", 0, 2);
+  x2("^\\w\82©\82«\82­\82¯\82±$", "z\82©\82«\82­\82¯\82±", 0, 11);
+  x2("^\\w...\82¤\82¦\82¨$", "z\82 \82¢\82¤\82¤\82¦\82¨", 0, 13);
+  x2("\\w\\w\\s\\W\82¨\82¨\82¨\\d", "a\82¨  \82¨\82¨\82¨4", 0, 12);
+  x2("\\A\82½\82¿\82Â", "\82½\82¿\82Â", 0, 6);
+  x2("\82Þ\82ß\82à\\Z", "\82Þ\82ß\82à", 0, 6);
+  x2("\82©\82«\82­\\z", "\82©\82«\82­", 0, 6);
+  x2("\82©\82«\82­\\Z", "\82©\82«\82­\n", 0, 6);
+  x2("\\G\82Û\82Ò", "\82Û\82Ò", 0, 4);
+  n("\\G\82¦", "\82¤\82¦\82¨");
+  n("\82Æ\82Ä\\G", "\82Æ\82Ä");
+  n("\82Ü\82Ý\\A", "\82Ü\82Ý");
+  n("\82Ü\\A\82Ý", "\82Ü\82Ý");
+  x2("(?=\82¹)\82¹", "\82¹", 0, 2);
+  n("(?=\82¤).", "\82¢");
+  x2("(?!\82¤)\82©", "\82©", 0, 2);
+  n("(?!\82Æ)\82 ", "\82Æ");
+  x2("(?i:\82 )", "\82 ", 0, 2);
+  x2("(?i:\82Ô\82×)", "\82Ô\82×", 0, 4);
+  n("(?i:\82¢)", "\82¤");
+  x2("(?m:\82æ.)", "\82æ\n", 0, 3);
+  x2("(?m:.\82ß)", "\82Ü\n\82ß", 2, 5);
+  x2("\82 ?", "", 0, 0);
+  x2("\95Ï?", "\89»", 0, 0);
+  x2("\95Ï?", "\95Ï", 0, 2);
+  x2("\97Ê*", "", 0, 0);
+  x2("\97Ê*", "\97Ê", 0, 2);
+  x2("\8eq*", "\8eq\8eq\8eq", 0, 6);
+  x2("\94n*", "\8e­\94n\94n\94n\94n", 0, 0);
+  n("\8eR+", "");
+  x2("\89Í+", "\89Í", 0, 2);
+  x2("\8e\9e+", "\8e\9e\8e\9e\8e\9e\8e\9e", 0, 8);
+  x2("\82¦+", "\82¦\82¦\82¤\82¤\82¤", 0, 4);
+  x2("\82¤+", "\82¨\82¤\82¤\82¤\82¤", 2, 10);
+  x2(".?", "\82½", 0, 2);
+  x2(".*", "\82Ï\82Ò\82Õ\82Ø", 0, 8);
+  x2(".+", "\82ë", 0, 2);
+  x2(".+", "\82¢\82¤\82¦\82©\n", 0, 8);
+  x2("\82 |\82¢", "\82 ", 0, 2);
+  x2("\82 |\82¢", "\82¢", 0, 2);
+  x2("\82 \82¢|\82¢\82¤", "\82 \82¢", 0, 4);
+  x2("\82 \82¢|\82¢\82¤", "\82¢\82¤", 0, 4);
+  x2("\82ð(?:\82©\82«|\82«\82­)", "\82ð\82©\82«", 0, 6);
+  x2("\82ð(?:\82©\82«|\82«\82­)\82¯", "\82ð\82«\82­\82¯", 0, 8);
+  x2("\82 \82¢|(?:\82 \82¤|\82 \82ð)", "\82 \82ð", 0, 4);
+  x2("\82 |\82¢|\82¤", "\82¦\82¤", 2, 4);
+  x2("\82 |\82¢|\82¤\82¦|\82¨\82©\82«|\82­|\82¯\82±\82³|\82µ\82·\82¹|\82»|\82½\82¿|\82Â\82Ä\82Æ\82È\82É|\82Ê\82Ë", "\82µ\82·\82¹", 0, 6);
+  n("\82 |\82¢|\82¤\82¦|\82¨\82©\82«|\82­|\82¯\82±\82³|\82µ\82·\82¹|\82»|\82½\82¿|\82Â\82Ä\82Æ\82È\82É|\82Ê\82Ë", "\82·\82¹");
+  x2("\82 |^\82í", "\82Ô\82 ", 2, 4);
+  x2("\82 |^\82ð", "\82ð\82 ", 0, 2);
+  x2("\8bS|\\G\8eÔ", "\82¯\8eÔ\8bS", 4, 6);
+  x2("\8bS|\\G\8eÔ", "\8eÔ\8bS", 0, 2);
+  x2("\8bS|\\A\8eÔ", "b\8eÔ\8bS", 3, 5);
+  x2("\8bS|\\A\8eÔ", "\8eÔ", 0, 2);
+  x2("\8bS|\8eÔ\\Z", "\8eÔ\8bS", 2, 4);
+  x2("\8bS|\8eÔ\\Z", "\8eÔ", 0, 2);
+  x2("\8bS|\8eÔ\\Z", "\8eÔ\n", 0, 2);
+  x2("\8bS|\8eÔ\\z", "\8eÔ\8bS", 2, 4);
+  x2("\8bS|\8eÔ\\z", "\8eÔ", 0, 2);
+  x2("\\w|\\s", "\82¨", 0, 2);
+  x2("\\w|%", "%\82¨", 0, 1);
+  x2("\\w|[&$]", "\82¤&", 0, 2);
+  x2("[\82¢-\82¯]", "\82¤", 0, 2);
+  x2("[\82¢-\82¯]|[^\82©-\82±]", "\82 ", 0, 2);
+  x2("[\82¢-\82¯]|[^\82©-\82±]", "\82©", 0, 2);
+  x2("[^\82 ]", "\n", 0, 1);
+  x2("(?:\82 |[\82¤-\82«])|\82¢\82ð", "\82¤\82ð", 0, 2);
+  x2("(?:\82 |[\82¤-\82«])|\82¢\82ð", "\82¢\82ð", 0, 4);
+  x2("\82 \82¢\82¤|(?=\82¯\82¯)..\82Ù", "\82¯\82¯\82Ù", 0, 6);
+  x2("\82 \82¢\82¤|(?!\82¯\82¯)..\82Ù", "\82 \82¢\82Ù", 0, 6);
+  x2("(?=\82ð\82 )..\82 |(?=\82ð\82ð)..\82 ", "\82ð\82ð\82 ", 0, 6);
+  x2("(?<=\82 |\82¢\82¤)\82¢", "\82¢\82¤\82¢", 4, 6);
+  n("(?>\82 |\82 \82¢\82¦)\82¤", "\82 \82¢\82¦\82¤");
+  x2("(?>\82 \82¢\82¦|\82 )\82¤", "\82 \82¢\82¦\82¤", 0, 8);
+  x2("\82 ?|\82¢", "\82 ", 0, 2);
+  x2("\82 ?|\82¢", "\82¢", 0, 0);
+  x2("\82 ?|\82¢", "", 0, 0);
+  x2("\82 *|\82¢", "\82 \82 ", 0, 4);
+  x2("\82 *|\82¢*", "\82¢\82 ", 0, 0);
+  x2("\82 *|\82¢*", "\82 \82¢", 0, 2);
+  x2("[a\82 ]*|\82¢*", "a\82 \82¢\82¢\82¢", 0, 3);
+  x2("\82 +|\82¢*", "", 0, 0);
+  x2("\82 +|\82¢*", "\82¢\82¢\82¢", 0, 6);
+  x2("\82 +|\82¢*", "\82 \82¢\82¢\82¢", 0, 2);
+  x2("\82 +|\82¢*", "a\82 \82¢\82¢\82¢", 0, 0);
+  n("\82 +|\82¢+", "");
+  x2("(\82 |\82¢)?", "\82¢", 0, 2);
+  x2("(\82 |\82¢)*", "\82¢\82 ", 0, 4);
+  x2("(\82 |\82¢)+", "\82¢\82 \82¢", 0, 6);
+  x2("(\82 \82¢|\82¤\82 )+", "\82¤\82 \82 \82¢\82¤\82¦", 0, 8);
+  x2("(\82 \82¢|\82¤\82¦)+", "\82¤\82 \82 \82¢\82¤\82¦", 4, 12);
+  x2("(\82 \82¢|\82¤\82 )+", "\82 \82 \82¢\82¤\82 ", 2, 10);
+  x2("(\82 \82¢|\82¤\82 )+", "\82 \82¢\82ð\82¤\82 ", 0, 4);
+  x2("(\82 \82¢|\82¤\82 )+", "$$zzzz\82 \82¢\82ð\82¤\82 ", 6, 10);
+  x2("(\82 |\82¢\82 \82¢)+", "\82 \82¢\82 \82¢\82 ", 0, 10);
+  x2("(\82 |\82¢\82 \82¢)+", "\82¢\82 ", 2, 4);
+  x2("(\82 |\82¢\82 \82¢)+", "\82¢\82 \82 \82 \82¢\82 ", 2, 8);
+  x2("(?:\82 |\82¢)(?:\82 |\82¢)", "\82 \82¢", 0, 4);
+  x2("(?:\82 *|\82¢*)(?:\82 *|\82¢*)", "\82 \82 \82 \82¢\82¢\82¢", 0, 6);
+  x2("(?:\82 *|\82¢*)(?:\82 +|\82¢+)", "\82 \82 \82 \82¢\82¢\82¢", 0, 12);
+  x2("(?:\82 +|\82¢+){2}", "\82 \82 \82 \82¢\82¢\82¢", 0, 12);
+  x2("(?:\82 +|\82¢+){1,2}", "\82 \82 \82 \82¢\82¢\82¢", 0, 12);
+  x2("(?:\82 +|\\A\82¢*)\82¤\82¤", "\82¤\82¤", 0, 4);
+  n("(?:\82 +|\\A\82¢*)\82¤\82¤", "\82 \82¢\82¤\82¤");
+  x2("(?:^\82 +|\82¢+)*\82¤", "\82 \82 \82¢\82¢\82¢\82 \82¢\82¤", 12, 16);
+  x2("(?:^\82 +|\82¢+)*\82¤", "\82 \82 \82¢\82¢\82¢\82¢\82¤", 0, 14);
+  x2("\82¤{0,}", "\82¤\82¤\82¤\82¤", 0, 8);
+  x2("\82 |(?i)c", "C", 0, 1);
+  x2("(?i)c|\82 ", "C", 0, 1);
+  x2("(?i:\82 )|a", "a", 0, 1);
+  n("(?i:\82 )|a", "A");
+  x2("[\82 \82¢\82¤]?", "\82 \82¢\82¤", 0, 2);
+  x2("[\82 \82¢\82¤]*", "\82 \82¢\82¤", 0, 6);
+  x2("[^\82 \82¢\82¤]*", "\82 \82¢\82¤", 0, 0);
+  n("[^\82 \82¢\82¤]+", "\82 \82¢\82¤");
+  x2("\82 ?\?", "\82 \82 \82 ", 0, 0);
+  x2("\82¢\82 ?\?\82¢", "\82¢\82 \82¢", 0, 6);
+  x2("\82 *?", "\82 \82 \82 ", 0, 0);
+  x2("\82¢\82 *?", "\82¢\82 \82 ", 0, 2);
+  x2("\82¢\82 *?\82¢", "\82¢\82 \82 \82¢", 0, 8);
+  x2("\82 +?", "\82 \82 \82 ", 0, 2);
+  x2("\82¢\82 +?", "\82¢\82 \82 ", 0, 4);
+  x2("\82¢\82 +?\82¢", "\82¢\82 \82 \82¢", 0, 8);
+  x2("(?:\93V?)?\?", "\93V", 0, 0);
+  x2("(?:\93V?\?)?", "\93V", 0, 0);
+  x2("(?:\96²?)+?", "\96²\96²\96²", 0, 2);
+  x2("(?:\95\97+)?\?", "\95\97\95\97\95\97", 0, 0);
+  x2("(?:\90á+)?\?\91\9a", "\90á\90á\90á\91\9a", 0, 8);
+  x2("(?:\82 \82¢)?{2}", "", 0, 0);
+  x2("(?:\8bS\8eÔ)?{2}", "\8bS\8eÔ\8bS\8eÔ\8bS", 0, 8);
+  x2("(?:\8bS\8eÔ)*{0}", "\8bS\8eÔ\8bS\8eÔ\8bS", 0, 0);
+  x2("(?:\8bS\8eÔ){3,}", "\8bS\8eÔ\8bS\8eÔ\8bS\8eÔ\8bS\8eÔ", 0, 16);
+  n("(?:\8bS\8eÔ){3,}", "\8bS\8eÔ\8bS\8eÔ");
+  x2("(?:\8bS\8eÔ){2,4}", "\8bS\8eÔ\8bS\8eÔ\8bS\8eÔ", 0, 12);
+  x2("(?:\8bS\8eÔ){2,4}", "\8bS\8eÔ\8bS\8eÔ\8bS\8eÔ\8bS\8eÔ\8bS\8eÔ", 0, 16);
+  x2("(?:\8bS\8eÔ){2,4}?", "\8bS\8eÔ\8bS\8eÔ\8bS\8eÔ\8bS\8eÔ\8bS\8eÔ", 0, 8);
+  x2("(?:\8bS\8eÔ){,}", "\8bS\8eÔ{,}", 0, 7);
+  x2("(?:\82©\82«\82­)+?{2}", "\82©\82«\82­\82©\82«\82­\82©\82«\82­", 0, 12);
+  x3("(\89Î)", "\89Î", 0, 2, 1);
+  x3("(\89Î\90\85)", "\89Î\90\85", 0, 4, 1);
+  x2("((\8e\9e\8aÔ))", "\8e\9e\8aÔ", 0, 4);
+  x3("((\95\97\90\85))", "\95\97\90\85", 0, 4, 1);
+  x3("((\8dð\93ú))", "\8dð\93ú", 0, 4, 2);
+  x3("((((((((((((((((((((\97Ê\8eq))))))))))))))))))))", "\97Ê\8eq", 0, 4, 20);
+  x3("(\82 \82¢)(\82¤\82¦)", "\82 \82¢\82¤\82¦", 0, 4, 1);
+  x3("(\82 \82¢)(\82¤\82¦)", "\82 \82¢\82¤\82¦", 4, 8, 2);
+  x3("()(\82 )\82¢\82¤(\82¦\82¨\82©)\82«\82­\82¯\82±", "\82 \82¢\82¤\82¦\82¨\82©\82«\82­\82¯\82±", 6, 12, 3);
+  x3("(()(\82 )\82¢\82¤(\82¦\82¨\82©)\82«\82­\82¯\82±)", "\82 \82¢\82¤\82¦\82¨\82©\82«\82­\82¯\82±", 6, 12, 4);
+  x3(".*(\83t\83H)\83\93\81E\83}(\83\93()\83V\83\85\83^)\83C\83\93", "\83t\83H\83\93\81E\83}\83\93\83V\83\85\83^\83C\83\93", 10, 18, 2);
+  x2("(^\82 )", "\82 ", 0, 2);
+  x3("(\82 )|(\82 )", "\82¢\82 ", 2, 4, 1);
+  x3("(^\82 )|(\82 )", "\82¢\82 ", 2, 4, 2);
+  x3("(\82 ?)", "\82 \82 \82 ", 0, 2, 1);
+  x3("(\82Ü*)", "\82Ü\82Ü\82Ü", 0, 6, 1);
+  x3("(\82Æ*)", "", 0, 0, 1);
+  x3("(\82é+)", "\82é\82é\82é\82é\82é\82é\82é", 0, 14, 1);
+  x3("(\82Ó+|\82Ö*)", "\82Ó\82Ó\82Ó\82Ö\82Ö", 0, 6, 1);
+  x3("(\82 +|\82¢?)", "\82¢\82¢\82¢\82 \82 ", 0, 2, 1);
+  x3("(\82 \82¢\82¤)?", "\82 \82¢\82¤", 0, 6, 1);
+  x3("(\82 \82¢\82¤)*", "\82 \82¢\82¤", 0, 6, 1);
+  x3("(\82 \82¢\82¤)+", "\82 \82¢\82¤", 0, 6, 1);
+  x3("(\82³\82µ\82·|\82 \82¢\82¤)+", "\82 \82¢\82¤", 0, 6, 1);
+  x3("([\82È\82É\82Ê][\82©\82«\82­]|\82©\82«\82­)+", "\82©\82«\82­", 0, 6, 1);
+  x3("((?i:\82 \82¢\82¤))", "\82 \82¢\82¤", 0, 6, 1);
+  x3("((?m:\82 .\82¤))", "\82 \n\82¤", 0, 5, 1);
+  x3("((?=\82 \82ñ)\82 )", "\82 \82ñ\82¢", 0, 2, 1);
+  x3("\82 \82¢\82¤|(.\82 \82¢\82¦)", "\82ñ\82 \82¢\82¦", 0, 8, 1);
+  x3("\82 *(.)", "\82 \82 \82 \82 \82ñ", 8, 10, 1);
+  x3("\82 *?(.)", "\82 \82 \82 \82 \82ñ", 0, 2, 1);
+  x3("\82 *?(\82ñ)", "\82 \82 \82 \82 \82ñ", 8, 10, 1);
+  x3("[\82¢\82¤\82¦]\82 *(.)", "\82¦\82 \82 \82 \82 \82ñ", 10, 12, 1);
+  x3("(\\A\82¢\82¢)\82¤\82¤", "\82¢\82¢\82¤\82¤", 0, 4, 1);
+  n("(\\A\82¢\82¢)\82¤\82¤", "\82ñ\82¢\82¢\82¤\82¤");
+  x3("(^\82¢\82¢)\82¤\82¤", "\82¢\82¢\82¤\82¤", 0, 4, 1);
+  n("(^\82¢\82¢)\82¤\82¤", "\82ñ\82¢\82¢\82¤\82¤");
+  x3("\82ë\82ë(\82é\82é$)", "\82ë\82ë\82é\82é", 4, 8, 1);
+  n("\82ë\82ë(\82é\82é$)", "\82ë\82ë\82é\82é\82é");
+  x2("(\96³)\\1", "\96³\96³", 0, 4);
+  n("(\96³)\\1", "\96³\95\90");
+  x2("(\8bó?)\\1", "\8bó\8bó", 0, 4);
+  x2("(\8bó?\?)\\1", "\8bó\8bó", 0, 0);
+  x2("(\8bó*)\\1", "\8bó\8bó\8bó\8bó\8bó", 0, 8);
+  x3("(\8bó*)\\1", "\8bó\8bó\8bó\8bó\8bó", 0, 4, 1);
+  x2("\82 (\82¢*)\\1", "\82 \82¢\82¢\82¢\82¢", 0, 10);
+  x2("\82 (\82¢*)\\1", "\82 \82¢", 0, 2);
+  x2("(\82 *)(\82¢*)\\1\\2", "\82 \82 \82 \82¢\82¢\82 \82 \82 \82¢\82¢", 0, 20);
+  x2("(\82 *)(\82¢*)\\2", "\82 \82 \82 \82¢\82¢\82¢\82¢", 0, 14);
+  x3("(\82 *)(\82¢*)\\2", "\82 \82 \82 \82¢\82¢\82¢\82¢", 6, 10, 2);
+  x2("(((((((\82Û*)\82Ø))))))\82Ò\\7", "\82Û\82Û\82Û\82Ø\82Ò\82Û\82Û\82Û", 0, 16);
+  x3("(((((((\82Û*)\82Ø))))))\82Ò\\7", "\82Û\82Û\82Û\82Ø\82Ò\82Û\82Û\82Û", 0, 6, 7);
+  x2("(\82Í)(\82Ð)(\82Ó)\\2\\1\\3", "\82Í\82Ð\82Ó\82Ð\82Í\82Ó", 0, 12);
+  x2("([\82«-\82¯])\\1", "\82­\82­", 0, 4);
+  x2("(\\w\\d\\s)\\1", "\82 5 \82 5 ", 0, 8);
+  n("(\\w\\d\\s)\\1", "\82 5 \82 5");
+  x2("(\92N\81H|[\82 -\82¤]{3})\\1", "\92N\81H\92N\81H", 0, 8);
+  x2("...(\92N\81H|[\82 -\82¤]{3})\\1", "\82 a\82 \92N\81H\92N\81H", 0, 13);
+  x2("(\92N\81H|[\82 -\82¤]{3})\\1", "\82¤\82¢\82¤\82¤\82¢\82¤", 0, 12);
+  x2("(^\82±)\\1", "\82±\82±", 0, 4);
+  n("(^\82Þ)\\1", "\82ß\82Þ\82Þ");
+  n("(\82 $)\\1", "\82 \82 ");
+  n("(\82 \82¢\\Z)\\1", "\82 \82¢");
+  x2("(\82 *\\Z)\\1", "\82 ", 2, 2);
+  x2(".(\82 *\\Z)\\1", "\82¢\82 ", 2, 4);
+  x3("(.(\82â\82¢\82ä)\\2)", "z\82â\82¢\82ä\82â\82¢\82ä", 0, 13, 1);
+  x3("(.(..\\d.)\\2)", "\82 12341234", 0, 10, 1);
+  x2("((?i:\82 v\82¸))\\1", "\82 v\82¸\82 v\82¸", 0, 10);
+  x2("(?<\8bð\82©>\95Ï|\\(\\g<\8bð\82©>\\))", "((((((\95Ï))))))", 0, 14);
+  x2("\\A(?:\\g<\88¢_1>|\\g<\89]_2>|\\z\8fI\97¹  (?<\88¢_1>\8aÏ|\8e©\\g<\89]_2>\8e©)(?<\89]_2>\8dÝ|\95ì\8eF\\g<\88¢_1>\95ì\8eF))$", "\95ì\8eF\8e©\95ì\8eF\8e©\8dÝ\8e©\95ì\8eF\8e©\95ì\8eF", 0, 26);
+  x2("[[\82Ð\82Ó]]", "\82Ó", 0, 2);
+  x2("[[\82¢\82¨\82¤]\82©]", "\82©", 0, 2);
+  n("[[^\82 ]]", "\82 ");
+  n("[^[\82 ]]", "\82 ");
+  x2("[^[^\82 ]]", "\82 ", 0, 2);
+  x2("[[\82©\82«\82­]&&\82«\82­]", "\82­", 0, 2);
+  n("[[\82©\82«\82­]&&\82«\82­]", "\82©");
+  n("[[\82©\82«\82­]&&\82«\82­]", "\82¯");
+  x2("[\82 -\82ñ&&\82¢-\82ð&&\82¤-\82ï]", "\82ï", 0, 2);
+  n("[^\82 -\82ñ&&\82¢-\82ð&&\82¤-\82ï]", "\82ï");
+  x2("[[^\82 &&\82 ]&&\82 -\82ñ]", "\82¢", 0, 2);
+  n("[[^\82 &&\82 ]&&\82 -\82ñ]", "\82 ");
+  x2("[[^\82 -\82ñ&&\82¢\82¤\82¦\82¨]&&[^\82¤-\82©]]", "\82«", 0, 2);
+  n("[[^\82 -\82ñ&&\82¢\82¤\82¦\82¨]&&[^\82¤-\82©]]", "\82¢");
+  x2("[^[^\82 \82¢\82¤]&&[^\82¤\82¦\82¨]]", "\82¤", 0, 2);
+  x2("[^[^\82 \82¢\82¤]&&[^\82¤\82¦\82¨]]", "\82¦", 0, 2);
+  n("[^[^\82 \82¢\82¤]&&[^\82¤\82¦\82¨]]", "\82©");
+  x2("[\82 -&&-\82 ]", "-", 0, 1);
+  x2("[^[^a-z\82 \82¢\82¤]&&[^bcdefg\82¤\82¦\82¨]q-w]", "\82¦", 0, 2);
+  x2("[^[^a-z\82 \82¢\82¤]&&[^bcdefg\82¤\82¦\82¨]g-w]", "f", 0, 1);
+  x2("[^[^a-z\82 \82¢\82¤]&&[^bcdefg\82¤\82¦\82¨]g-w]", "g", 0, 1);
+  n("[^[^a-z\82 \82¢\82¤]&&[^bcdefg\82¤\82¦\82¨]g-w]", "2");
+  x2("a<b>\83o\81[\83W\83\87\83\93\82Ì\83_\83E\83\93\83\8d\81[\83h<\\/b>", "a<b>\83o\81[\83W\83\87\83\93\82Ì\83_\83E\83\93\83\8d\81[\83h</b>", 0, 32);
+  x2(".<b>\83o\81[\83W\83\87\83\93\82Ì\83_\83E\83\93\83\8d\81[\83h<\\/b>", "a<b>\83o\81[\83W\83\87\83\93\82Ì\83_\83E\83\93\83\8d\81[\83h</b>", 0, 32);
+  fprintf(stdout,
+       "\nRESULT   SUCC: %d,  FAIL: %d,  ERROR: %d      (by Oniguruma %s)\n",
+       nsucc, nfail, nerror, onig_version());
+
+#ifndef POSIX_TEST
+  onig_region_free(region, 1);
+  onig_end();
+#endif
+
+  return ((nfail == 0 && nerror == 0) ? 0 : -1);
+}
index dd6162c2515746dd0b40bdef112f8ec135554abf..2f0776bc782dd160aba89f61cee82f34466dedcc 100644 (file)
@@ -29,9 +29,9 @@ function do_translit($st) {
     return $st;
 } 
 
-echo do_translit("ФÑ\83Ñ\86к");
+echo do_translit("Ð\9fеаÑ\80");
 ?>
 --EXPECT--
-Fuck
+pear
 --CREDITS--
-Testfest Wuerzburg 2009-06-20
+Testfest Wuerzburg 2009-06-20 (modified by rui 2011-10-15)
\ No newline at end of file