]> granicus.if.org Git - php/commitdiff
MFB: Thanks to the "maintainers" who are too lazy to commit FIRST to HEAD!
authorJani Taskinen <jani@php.net>
Mon, 20 Apr 2009 17:06:03 +0000 (17:06 +0000)
committerJani Taskinen <jani@php.net>
Mon, 20 Apr 2009 17:06:03 +0000 (17:06 +0000)
124 files changed:
ext/mbstring/config.m4
ext/mbstring/config.w32
ext/mbstring/libmbfl/AUTHORS
ext/mbstring/libmbfl/Makefile.am
ext/mbstring/libmbfl/configure.in
ext/mbstring/libmbfl/filters/Makefile.am
ext/mbstring/libmbfl/filters/Makefile.bcc32
ext/mbstring/libmbfl/filters/mbfilter_cp1254.c [new file with mode: 0644]
ext/mbstring/libmbfl/filters/mbfilter_cp1254.h [new file with mode: 0644]
ext/mbstring/libmbfl/filters/mbfilter_cp850.c [new file with mode: 0644]
ext/mbstring/libmbfl/filters/mbfilter_cp850.h [new file with mode: 0644]
ext/mbstring/libmbfl/filters/mbfilter_euc_jp_win.c
ext/mbstring/libmbfl/filters/mbfilter_iso2022_jp_ms.c [new file with mode: 0644]
ext/mbstring/libmbfl/filters/mbfilter_iso2022_jp_ms.h [new file with mode: 0644]
ext/mbstring/libmbfl/filters/mbfilter_jis.c
ext/mbstring/libmbfl/filters/mbfilter_jis.h
ext/mbstring/libmbfl/filters/mbfilter_koi8u.c [new file with mode: 0644]
ext/mbstring/libmbfl/filters/mbfilter_koi8u.h [new file with mode: 0644]
ext/mbstring/libmbfl/filters/unicode_table_cp1254.h [new file with mode: 0644]
ext/mbstring/libmbfl/filters/unicode_table_cp850.h [new file with mode: 0644]
ext/mbstring/libmbfl/filters/unicode_table_jis.h
ext/mbstring/libmbfl/filters/unicode_table_koi8u.h [new file with mode: 0644]
ext/mbstring/libmbfl/libmbfl.dsp
ext/mbstring/libmbfl/libmbfl.sln
ext/mbstring/libmbfl/libmbfl.vcproj
ext/mbstring/libmbfl/mbfl.rc
ext/mbstring/libmbfl/mbfl/Makefile.am
ext/mbstring/libmbfl/mbfl/Makefile.bcc32
ext/mbstring/libmbfl/mbfl/mbfilter.h
ext/mbstring/libmbfl/mbfl/mbfilter_8bit.h
ext/mbstring/libmbfl/mbfl/mbfilter_pass.h
ext/mbstring/libmbfl/mbfl/mbfilter_wchar.h
ext/mbstring/libmbfl/mbfl/mbfl_convert.c
ext/mbstring/libmbfl/mbfl/mbfl_encoding.c
ext/mbstring/libmbfl/mbfl/mbfl_encoding.h
ext/mbstring/libmbfl/mbfl/mbfl_ident.c
ext/mbstring/libmbfl/mbfl/mbfl_language.c
ext/mbstring/libmbfl/mbfl/mbfl_language.h
ext/mbstring/libmbfl/nls/Makefile.am
ext/mbstring/libmbfl/nls/Makefile.bcc32
ext/mbstring/libmbfl/nls/nls_ua.c [new file with mode: 0644]
ext/mbstring/libmbfl/nls/nls_ua.h [new file with mode: 0644]
ext/mbstring/libmbfl/tests/Makefile.am [new file with mode: 0644]
ext/mbstring/libmbfl/tests/conv_encoding.c [new file with mode: 0644]
ext/mbstring/libmbfl/tests/conv_encoding.tests/Makefile.am [new file with mode: 0644]
ext/mbstring/libmbfl/tests/conv_encoding.tests/cp51932_cp50220raw.exp [new file with mode: 0644]
ext/mbstring/libmbfl/tests/conv_encoding.tests/ujis_sjis.exp [new file with mode: 0644]
ext/mbstring/libmbfl/tests/conv_encoding.tests/utf8_sjis.exp [new file with mode: 0644]
ext/mbstring/libmbfl/tests/conv_kana.c [new file with mode: 0644]
ext/mbstring/libmbfl/tests/strcut.c [new file with mode: 0644]
ext/mbstring/libmbfl/tests/strcut.tests/Makefile.am [new file with mode: 0644]
ext/mbstring/libmbfl/tests/strcut.tests/iso2022jp.exp [new file with mode: 0644]
ext/mbstring/libmbfl/tests/strcut.tests/ujis.exp [new file with mode: 0644]
ext/mbstring/libmbfl/tests/strcut.tests/utf8.exp [new file with mode: 0644]
ext/mbstring/libmbfl/tests/strwidth.tests/Makefile.am [new file with mode: 0644]
ext/mbstring/libmbfl/tests/strwidth.tests/conv_encoding.c [new file with mode: 0644]
ext/mbstring/libmbfl/tests/strwidth.tests/conv_encoding.tests/Makefile.am [new file with mode: 0644]
ext/mbstring/libmbfl/tests/strwidth.tests/conv_encoding.tests/cp51932_cp50220raw.exp [new file with mode: 0644]
ext/mbstring/libmbfl/tests/strwidth.tests/conv_encoding.tests/ujis_sjis.exp [new file with mode: 0644]
ext/mbstring/libmbfl/tests/strwidth.tests/conv_encoding.tests/utf8_sjis.exp [new file with mode: 0644]
ext/mbstring/libmbfl/tests/strwidth.tests/conv_kana.c [new file with mode: 0644]
ext/mbstring/libmbfl/tests/strwidth.tests/conv_kana.tests/Makefile.am [new file with mode: 0644]
ext/mbstring/libmbfl/tests/strwidth.tests/conv_kana.tests/conv_kana.exp [new file with mode: 0644]
ext/mbstring/libmbfl/tests/strwidth.tests/strwidth.exp [new file with mode: 0644]
ext/mbstring/mbstring.c
ext/mbstring/mbstring.h
ext/mbstring/oniguruma/COPYING
ext/mbstring/oniguruma/HISTORY
ext/mbstring/oniguruma/README
ext/mbstring/oniguruma/README.ja
ext/mbstring/oniguruma/config.h.in
ext/mbstring/oniguruma/enc/big5.c
ext/mbstring/oniguruma/enc/euc_jp.c
ext/mbstring/oniguruma/enc/euc_kr.c
ext/mbstring/oniguruma/enc/euc_tw.c
ext/mbstring/oniguruma/enc/iso8859_1.c
ext/mbstring/oniguruma/enc/iso8859_10.c
ext/mbstring/oniguruma/enc/iso8859_11.c
ext/mbstring/oniguruma/enc/iso8859_13.c
ext/mbstring/oniguruma/enc/iso8859_14.c
ext/mbstring/oniguruma/enc/iso8859_15.c
ext/mbstring/oniguruma/enc/iso8859_16.c
ext/mbstring/oniguruma/enc/iso8859_2.c
ext/mbstring/oniguruma/enc/iso8859_3.c
ext/mbstring/oniguruma/enc/iso8859_4.c
ext/mbstring/oniguruma/enc/iso8859_5.c
ext/mbstring/oniguruma/enc/iso8859_6.c
ext/mbstring/oniguruma/enc/iso8859_7.c
ext/mbstring/oniguruma/enc/iso8859_8.c
ext/mbstring/oniguruma/enc/iso8859_9.c
ext/mbstring/oniguruma/enc/koi8.c
ext/mbstring/oniguruma/enc/koi8_r.c
ext/mbstring/oniguruma/enc/mktable.c
ext/mbstring/oniguruma/enc/sjis.c
ext/mbstring/oniguruma/enc/unicode.c
ext/mbstring/oniguruma/enc/utf16_be.c
ext/mbstring/oniguruma/enc/utf16_le.c
ext/mbstring/oniguruma/enc/utf32_be.c
ext/mbstring/oniguruma/enc/utf32_le.c
ext/mbstring/oniguruma/enc/utf8.c
ext/mbstring/oniguruma/index.html
ext/mbstring/oniguruma/onigcmpt200.h
ext/mbstring/oniguruma/oniggnu.h
ext/mbstring/oniguruma/oniguruma.h
ext/mbstring/oniguruma/regcomp.c
ext/mbstring/oniguruma/regenc.c
ext/mbstring/oniguruma/regenc.h
ext/mbstring/oniguruma/regerror.c
ext/mbstring/oniguruma/regexec.c
ext/mbstring/oniguruma/regext.c
ext/mbstring/oniguruma/reggnu.c
ext/mbstring/oniguruma/regint.h
ext/mbstring/oniguruma/regparse.c
ext/mbstring/oniguruma/regparse.h
ext/mbstring/oniguruma/regposix.c
ext/mbstring/oniguruma/regsyntax.c
ext/mbstring/oniguruma/regversion.c
ext/mbstring/oniguruma/st.c
ext/mbstring/oniguruma/st.h
ext/mbstring/php_mbregex.c
ext/mbstring/php_unicode.c
ext/mbstring/tests/bug43994.phpt
ext/mbstring/tests/bug43998.phpt
ext/mbstring/tests/mb_strstr.phpt [new file with mode: 0644]

index e61ba3c1b2a714a97284bd273129ef0a204d912a..a5e7920cdf8e1807c310e92f3e6cddf837eb1e66 100644 (file)
@@ -182,9 +182,9 @@ int main() { return foo(10, "", 3.14); }
       PHP_EVAL_LIBLINE([$MBSTRING_SHARED_LIBADD], LDFLAGS)
       AC_MSG_CHECKING([if oniguruma has an invalid entry for KOI8 encoding])
       AC_TRY_LINK([
-  #include <oniguruma.h>
+#include <oniguruma.h>
       ], [
-  return (int)(ONIG_ENCODING_KOI8 + 1);
+return (int)(ONIG_ENCODING_KOI8 + 1);
       ], [
         AC_MSG_RESULT([no])
       ], [
@@ -228,6 +228,7 @@ AC_DEFUN([PHP_MBSTRING_SETUP_LIBMBFL], [
      libmbfl/filters/mbfilter_byte4.c
      libmbfl/filters/mbfilter_cp1251.c
      libmbfl/filters/mbfilter_cp1252.c
+     libmbfl/filters/mbfilter_cp1254.c
      libmbfl/filters/mbfilter_cp866.c
      libmbfl/filters/mbfilter_cp932.c
      libmbfl/filters/mbfilter_cp936.c
@@ -255,6 +256,7 @@ AC_DEFUN([PHP_MBSTRING_SETUP_LIBMBFL], [
      libmbfl/filters/mbfilter_iso8859_8.c
      libmbfl/filters/mbfilter_iso8859_9.c
      libmbfl/filters/mbfilter_jis.c
+     libmbfl/filters/mbfilter_iso2022_jp_ms.c
      libmbfl/filters/mbfilter_koi8r.c
      libmbfl/filters/mbfilter_armscii8.c
      libmbfl/filters/mbfilter_qprint.c
@@ -268,6 +270,7 @@ AC_DEFUN([PHP_MBSTRING_SETUP_LIBMBFL], [
      libmbfl/filters/mbfilter_utf7imap.c
      libmbfl/filters/mbfilter_utf8.c
      libmbfl/filters/mbfilter_uuencode.c
+     libmbfl/filters/mbfilter_koi8u.c
      libmbfl/filters/mbfilter_cp850.c
      libmbfl/mbfl/mbfilter.c
      libmbfl/mbfl/mbfilter_8bit.c
@@ -291,9 +294,9 @@ AC_DEFUN([PHP_MBSTRING_SETUP_LIBMBFL], [
      libmbfl/nls/nls_zh.c
      libmbfl/nls/nls_hy.c
      libmbfl/nls/nls_tr.c
+     libmbfl/nls/nls_ua.c
     ])
     PHP_MBSTRING_ADD_CFLAG([-DHAVE_CONFIG_H])
-
     PHP_MBSTRING_ADD_INSTALL_HEADERS([libmbfl/config.h libmbfl/mbfl/eaw_table.h libmbfl/mbfl/mbfilter.h libmbfl/mbfl/mbfilter_8bit.h libmbfl/mbfl/mbfilter_pass.h libmbfl/mbfl/mbfilter_wchar.h libmbfl/mbfl/mbfl_allocators.h libmbfl/mbfl/mbfl_consts.h libmbfl/mbfl/mbfl_convert.h libmbfl/mbfl/mbfl_defs.h libmbfl/mbfl/mbfl_encoding.h libmbfl/mbfl/mbfl_filter_output.h libmbfl/mbfl/mbfl_ident.h libmbfl/mbfl/mbfl_language.h libmbfl/mbfl/mbfl_memory_device.h libmbfl/mbfl/mbfl_string.h])
   else
     dnl
@@ -337,8 +340,8 @@ PHP_ARG_ENABLE([mbregex_backtrack], [whether to check multibyte regex backtrack]
                             MBSTRING: Disable multibyte regex backtrack check], yes, no)
 
 PHP_ARG_WITH(libmbfl, [for external libmbfl],
-[  --with-libmbfl[=DIR]      MBSTRING: Use external libmbfl. DIR is the libmbfl install prefix.
-                            If DIR is not set, the bundled libmbfl will be used], no, no)
+[  --with-libmbfl[=DIR]      MBSTRING: Use external libmbfl.  DIR is the libmbfl base
+                            install directory [BUNDLED]], no, no)
 
 PHP_ARG_WITH(onig, [for external oniguruma],
 [  --with-onig[=DIR]         MBSTRING: Use external oniguruma. DIR is the oniguruma install prefix.
index 87f68dbd3b0eef33761e886918638d9f0226f7a5..f452c1868aceb9bd247753a2a300405faa5533a1 100644 (file)
@@ -3,6 +3,7 @@
 
 ARG_ENABLE("mbstring", "multibyte string functions", "no");
 ARG_ENABLE("mbregex", "multibyte regex support", "no");
+ARG_ENABLE("mbregex-backtrack", "check multibyte regex backtrack", "yes");
 
 if (PHP_MBSTRING == "yes") {
 
@@ -11,7 +12,7 @@ if (PHP_MBSTRING == "yes") {
        FSO.CopyFile("ext\\mbstring\\oniguruma\\win32\\config.h",
                "ext\\mbstring\\oniguruma\\config.h", true);
        
-       EXTENSION("mbstring", "mbstring.c php_unicode.c mb_gpc.c", null,
+       EXTENSION("mbstring", "mbstring.c php_unicode.c mb_gpc.c", true,
                "-Iext/mbstring/libmbfl -Iext/mbstring/libmbfl/mbfl \
                -Iext/mbstring/oniguruma /D NOT_RUBY=1 /D LIBMBFL_EXPORTS=1 \
                /D HAVE_STDARG_PROTOTYPES=1 /D HAVE_CONFIG_H /D HAVE_STDLIB_H \
@@ -28,10 +29,12 @@ if (PHP_MBSTRING == "yes") {
                mbfilter_iso8859_2.c mbfilter_iso8859_3.c mbfilter_iso8859_4.c \
                mbfilter_iso8859_5.c mbfilter_iso8859_6.c mbfilter_iso8859_7.c \
                mbfilter_iso8859_8.c mbfilter_iso8859_9.c mbfilter_jis.c \
+               mbfilter_iso2022_jp_ms.c \
                mbfilter_koi8r.c mbfilter_qprint.c mbfilter_sjis.c mbfilter_ucs2.c \
                mbfilter_ucs4.c mbfilter_uhc.c mbfilter_utf16.c mbfilter_utf32.c \
                mbfilter_utf7.c mbfilter_utf7imap.c mbfilter_utf8.c \
-               mbfilter_uuencode.c mbfilter_armscii8.c", "mbstring");
+               mbfilter_koi8u.c mbfilter_cp1254.c \
+               mbfilter_uuencode.c mbfilter_armscii8.c mbfilter_cp850.c ", "mbstring");
 
        ADD_SOURCES("ext/mbstring/libmbfl/mbfl", "mbfilter.c mbfilter_8bit.c \
                mbfilter_pass.c mbfilter_wchar.c mbfl_convert.c mbfl_encoding.c \
@@ -40,7 +43,7 @@ if (PHP_MBSTRING == "yes") {
 
        ADD_SOURCES("ext/mbstring/libmbfl/nls", "nls_de.c nls_en.c nls_ja.c \
                nls_kr.c nls_neutral.c nls_ru.c nls_uni.c nls_zh.c nls_hy.c \
-               nls_tr.c", "mbstring");
+               nls_ua.c nls_tr.c", "mbstring");
 
        AC_DEFINE('HAVE_MBSTRING', 1, 'Have mbstring support');
        AC_DEFINE('HAVE_MBSTR_CN', 1, 'CN');
@@ -53,6 +56,9 @@ if (PHP_MBSTRING == "yes") {
                AC_DEFINE('HAVE_STDARG_PROTOTYPES', 1, 'have stdarg.h');
                AC_DEFINE('HAVE_MBREGEX', 1);
                AC_DEFINE('HAVE_ONIG', 1);
+               if (PHP_MBREGEX_BACKTRACK != "no") {
+                       AC_DEFINE('USE_COMBINATION_EXPLOSION_CHECK', 1);
+               }
                ADD_SOURCES("ext/mbstring/oniguruma", "regcomp.c regerror.c \
                        regenc.c regexec.c reggnu.c regparse.c regposerr.c \
                        regext.c regsyntax.c regtrav.c regversion.c st.c", "mbstring");
@@ -62,7 +68,7 @@ if (PHP_MBSTRING == "yes") {
                        iso8859_7.c iso8859_8.c iso8859_9.c iso8859_10.c \
                        iso8859_11.c iso8859_13.c iso8859_14.c iso8859_15.c iso8859_16.c \
                        koi8.c koi8_r.c sjis.c utf8.c unicode.c utf16_be.c utf16_le.c \
-                       utf32_be.c utf32_le.c", "mbstring");
+                       utf32_be.c utf32_le.c gb18030.c", "mbstring");
                ADD_SOURCES("ext/mbstring", "php_mbregex.c", "mbstring");
        }
 }
index e6062315182a91970d888bdec8cff7fc5a14ecb2..9a9f2f9fa379d2bdb8ee62728ca53e3a9dea3f32 100644 (file)
@@ -1,10 +1,13 @@
-Den V. Tsopa <tdv@edisoft.ru>
-Hironori Sato <satoh@jpnnet.com>
 Marcus Boerger <helly@php.net>
-Moriyoshi Koizumi <moriyoshi@php.net>
+Hayk Chamyan <hamshen@gmail.com>
+Wez Furlong <wez@thebrainroom.com>
 Rui Hirokawa <hirokawa@php.net>
 Shigeru Kanemoto <sgk@happysize.co.jp>
-Tsukada Takuya <tsukada@fminn.nagano.nagano.jp>
-Tateyama  <tateyan@amy.hi-ho.ne.jp>
 U. Kenkichi <kenkichi@axes.co.jp>
-Wez Furlong <wez@thebrainroom.com>
+Moriyoshi Koizumi <moriyoshi@php.net>
+Hironori Sato <satoh@jpnnet.com>
+Tsukada Takuya <tsukada@fminn.nagano.nagano.jp>
+Tateyama <tateyan@amy.hi-ho.ne.jp>
+Den V. Tsopa <tdv@edisoft.ru>
+Maksym Veremeyenko <verem@m1stereo.tv>
+Haluk AKIN <halukakin@gmail.com>
index 7f60683435c741b4d4a1a81554c033f8d094b511..070a7fcc622eb9798d409fcf9fedae24e3754bc9 100644 (file)
@@ -1,5 +1,9 @@
+AUTOMAKE_OPTIONS=dejagnu
+DEJATOOL=conv_encoding conv_kana strwidth strcut
+RUNTESTDEFAULTFLAGS=--tool $$tool --srcdir "$$srcdir"/tests
+LANG=C
 EXTRA_DIST=AUTHORS DISCLAIMER LICENSE Makefile.bcc32 \
        config.h.bcc32 config.h.vc6 \
        libmbfl.dsp libmbfl.dsw libmbfl.sln libmbfl.vcproj mbfl.rc \
        mksbcc32.bat rules.mak.bcc32
-SUBDIRS = nls filters mbfl
+SUBDIRS = nls filters mbfl tests
index 25d2e59373c998062f56c90698cbbaaf71724283..47e1026c7d3e24e4c3fb7da38fdf489485707914 100644 (file)
@@ -1,10 +1,10 @@
 # Process this file with autoconf to produce a configure script.
 AC_INIT(mbfl/mbfilter.c)
-AM_INIT_AUTOMAKE(libmbfl, 1.0.0)
+AM_INIT_AUTOMAKE(libmbfl, 1.0.2)
 AC_CONFIG_SRCDIR(mbfl/mbfilter.c)
 AM_CONFIG_HEADER(config.h)
 
-SHLIB_VERSION="1:0:0"
+SHLIB_VERSION="1:0:2"
 AC_SUBST(SHLIB_VERSION)
 
 # Checks for programs.
@@ -34,5 +34,14 @@ if test "$FETCH_VIA_FTP" = "curl"; then
   FETCH_VIA_FTP="curl -O"
 fi
 
-AC_CONFIG_FILES([Makefile mbfl/Makefile filters/Makefile nls/Makefile])
+AC_CONFIG_FILES([
+  Makefile
+  mbfl/Makefile
+  filters/Makefile
+  nls/Makefile
+  tests/Makefile
+  tests/conv_encoding.tests/Makefile
+  tests/conv_kana.tests/Makefile
+  tests/strwidth.tests/Makefile
+  tests/strcut.tests/Makefile])
 AC_OUTPUT
index 9b2fda4c39532a4856126a7dec6a65216fea96f1..802af4e61db735ab4d31d333c730c280e5d00b5a 100644 (file)
@@ -2,7 +2,139 @@ EXTRA_DIST=Makefile.bcc32 mk_sb_tbl.awk
 noinst_LTLIBRARIES=libmbfl_filters.la
 INCLUDES=-I../mbfl
 libmbfl_filters_la_LDFLAGS=-version-info $(SHLIB_VERSION)
-libmbfl_filters_la_SOURCES=mbfilter_cp936.c mbfilter_hz.c mbfilter_euc_tw.c mbfilter_big5.c mbfilter_euc_jp.c mbfilter_jis.c mbfilter_iso8859_1.c mbfilter_iso8859_2.c mbfilter_cp1252.c mbfilter_cp1251.c mbfilter_ascii.c mbfilter_iso8859_3.c mbfilter_iso8859_4.c mbfilter_iso8859_5.c mbfilter_iso8859_6.c mbfilter_iso8859_7.c mbfilter_iso8859_8.c mbfilter_iso8859_9.c mbfilter_iso8859_10.c mbfilter_iso8859_13.c mbfilter_iso8859_14.c mbfilter_iso8859_15.c mbfilter_iso8859_16.c mbfilter_htmlent.c mbfilter_byte2.c mbfilter_byte4.c mbfilter_uuencode.c mbfilter_base64.c mbfilter_sjis.c mbfilter_7bit.c mbfilter_qprint.c mbfilter_ucs4.c mbfilter_ucs2.c mbfilter_utf32.c mbfilter_utf16.c mbfilter_utf8.c mbfilter_utf7.c mbfilter_utf7imap.c mbfilter_euc_jp_win.c mbfilter_cp932.c mbfilter_cp51932.c mbfilter_euc_cn.c mbfilter_euc_kr.c mbfilter_uhc.c mbfilter_iso2022_kr.c mbfilter_cp866.c mbfilter_koi8r.c mbfilter_armscii8.c html_entities.c cp932_table.h html_entities.h mbfilter_7bit.h mbfilter_ascii.h mbfilter_base64.h mbfilter_big5.h mbfilter_byte2.h mbfilter_byte4.h mbfilter_cp1251.h mbfilter_cp1252.h mbfilter_cp866.h mbfilter_cp932.h mbfilter_cp51932.h mbfilter_cp936.h mbfilter_euc_cn.h mbfilter_euc_jp.h mbfilter_euc_jp_win.h mbfilter_euc_kr.h mbfilter_euc_tw.h mbfilter_htmlent.h mbfilter_hz.h mbfilter_iso2022_kr.h mbfilter_iso8859_1.h mbfilter_iso8859_10.h mbfilter_iso8859_13.h mbfilter_iso8859_14.h mbfilter_iso8859_15.h mbfilter_iso8859_16.h mbfilter_iso8859_2.h mbfilter_iso8859_3.h mbfilter_iso8859_4.h mbfilter_iso8859_5.h mbfilter_iso8859_6.h mbfilter_iso8859_7.h mbfilter_iso8859_8.h mbfilter_iso8859_9.h mbfilter_jis.h mbfilter_koi8r.h mbfilter_armscii8.h mbfilter_qprint.h mbfilter_sjis.h mbfilter_ucs2.h mbfilter_ucs4.h mbfilter_uhc.h mbfilter_utf16.h mbfilter_utf32.h mbfilter_utf7.h mbfilter_utf7imap.h mbfilter_utf8.h mbfilter_uuencode.h unicode_prop.h unicode_table_big5.h unicode_table_cns11643.h unicode_table_cp1251.h unicode_table_cp1252.h unicode_table_cp866.h unicode_table_cp932_ext.h unicode_table_cp936.h unicode_table_iso8859_10.h unicode_table_iso8859_13.h unicode_table_iso8859_14.h unicode_table_iso8859_15.h unicode_table_iso8859_16.h unicode_table_iso8859_2.h unicode_table_iso8859_3.h unicode_table_iso8859_4.h unicode_table_iso8859_5.h unicode_table_iso8859_6.h unicode_table_iso8859_7.h unicode_table_iso8859_8.h unicode_table_iso8859_9.h unicode_table_jis.h unicode_table_koi8r.h unicode_table_armscii8.h unicode_table_uhc.h
+libmbfl_filters_la_SOURCES=mbfilter_cp936.c \
+       mbfilter_hz.c \
+       mbfilter_euc_tw.c \
+       mbfilter_big5.c \
+       mbfilter_euc_jp.c \
+       mbfilter_jis.c \
+       mbfilter_iso8859_1.c \
+       mbfilter_iso8859_2.c \
+       mbfilter_cp1254.c \
+       mbfilter_cp1252.c \
+       mbfilter_cp1251.c \
+       mbfilter_ascii.c \
+       mbfilter_iso8859_3.c \
+       mbfilter_iso8859_4.c \
+       mbfilter_iso8859_5.c \
+       mbfilter_iso8859_6.c \
+       mbfilter_iso8859_7.c \
+       mbfilter_iso8859_8.c \
+       mbfilter_iso8859_9.c \
+       mbfilter_iso8859_10.c \
+       mbfilter_iso8859_13.c \
+       mbfilter_iso8859_14.c \
+       mbfilter_iso8859_15.c \
+       mbfilter_iso8859_16.c \
+       mbfilter_htmlent.c \
+       mbfilter_byte2.c \
+       mbfilter_byte4.c \
+       mbfilter_uuencode.c \
+       mbfilter_base64.c \
+       mbfilter_sjis.c \
+       mbfilter_7bit.c \
+       mbfilter_qprint.c \
+       mbfilter_ucs4.c \
+       mbfilter_ucs2.c \
+       mbfilter_utf32.c \
+       mbfilter_utf16.c \
+       mbfilter_utf8.c \
+       mbfilter_utf7.c \
+       mbfilter_utf7imap.c \
+       mbfilter_euc_jp_win.c \
+       mbfilter_cp932.c \
+       mbfilter_cp51932.c \
+       mbfilter_euc_cn.c \
+       mbfilter_euc_kr.c \
+       mbfilter_uhc.c \
+       mbfilter_iso2022_kr.c \
+       mbfilter_cp866.c \
+       mbfilter_koi8r.c \
+       mbfilter_koi8u.c \
+       mbfilter_armscii8.c \
+       mbfilter_cp850.c \
+       html_entities.c \
+       cp932_table.h \
+       html_entities.h \
+       mbfilter_7bit.h \
+       mbfilter_ascii.h \
+       mbfilter_base64.h \
+       mbfilter_big5.h \
+       mbfilter_byte2.h \
+       mbfilter_byte4.h \
+       mbfilter_cp1251.h \
+       mbfilter_cp1252.h \
+       mbfilter_cp1254.h \
+       mbfilter_cp866.h \
+       mbfilter_cp932.h \
+       mbfilter_cp936.h \
+       mbfilter_euc_cn.h \
+       mbfilter_euc_jp.h \
+       mbfilter_euc_jp_win.h \
+       mbfilter_euc_kr.h \
+       mbfilter_euc_tw.h \
+       mbfilter_htmlent.h \
+       mbfilter_hz.h \
+       mbfilter_iso2022_kr.h \
+       mbfilter_iso8859_1.h \
+       mbfilter_iso8859_10.h \
+       mbfilter_iso8859_13.h \
+       mbfilter_iso8859_14.h \
+       mbfilter_iso8859_15.h \
+       mbfilter_iso8859_16.h \
+       mbfilter_iso8859_2.h \
+       mbfilter_iso8859_3.h \
+       mbfilter_iso8859_4.h \
+       mbfilter_iso8859_5.h \
+       mbfilter_iso8859_6.h \
+       mbfilter_iso8859_7.h \
+       mbfilter_iso8859_8.h \
+       mbfilter_iso8859_9.h \
+       mbfilter_jis.h \
+       mbfilter_koi8r.h \
+       mbfilter_koi8u.h \
+       mbfilter_armscii8.h \
+       mbfilter_qprint.h \
+       mbfilter_sjis.h \
+       mbfilter_ucs2.h \
+       mbfilter_ucs4.h \
+       mbfilter_uhc.h \
+       mbfilter_utf16.h \
+       mbfilter_utf32.h \
+       mbfilter_utf7.h \
+       mbfilter_utf7imap.h \
+       mbfilter_utf8.h \
+       mbfilter_uuencode.h \
+       mbfilter_cp51932.h \
+       mbfilter_cp850.h \
+       unicode_prop.h \
+       unicode_table_big5.h \
+       unicode_table_cns11643.h \
+       unicode_table_cp1251.h \
+       unicode_table_cp1252.h \
+       unicode_table_cp1254.h \
+       unicode_table_cp866.h \
+       unicode_table_cp932_ext.h \
+       unicode_table_cp936.h \
+       unicode_table_iso8859_10.h \
+       unicode_table_iso8859_13.h \
+       unicode_table_iso8859_14.h \
+       unicode_table_iso8859_15.h \
+       unicode_table_iso8859_16.h \
+       unicode_table_iso8859_2.h \
+       unicode_table_iso8859_3.h \
+       unicode_table_iso8859_4.h \
+       unicode_table_iso8859_5.h \
+       unicode_table_iso8859_6.h \
+       unicode_table_iso8859_7.h \
+       unicode_table_iso8859_8.h \
+       unicode_table_iso8859_9.h \
+       unicode_table_jis.h \
+       unicode_table_koi8r.h \
+       unicode_table_koi8u.h \
+       unicode_table_armscii8.h \
+       unicode_table_cp850.h \
+       unicode_table_uhc.h
 
 mbfilter_iso8859_2.c: unicode_table_iso8859_2.h
 
index 03e1d8802456073b78f0536340125824ad4be0e5..841c09632a4888587b083e3d1656298acacffdeb 100644 (file)
@@ -1,6 +1,56 @@
 !include ..\rules.mak.bcc32
 INCLUDES=$(INCLUDES) -I../mbfl
-OBJS=mbfilter_cp936.obj mbfilter_hz.obj mbfilter_euc_tw.obj mbfilter_big5.obj mbfilter_euc_jp.obj mbfilter_jis.obj mbfilter_iso8859_1.obj mbfilter_iso8859_2.obj mbfilter_cp1252.obj mbfilter_cp1251.obj mbfilter_ascii.obj mbfilter_iso8859_3.obj mbfilter_iso8859_4.obj mbfilter_iso8859_5.obj mbfilter_iso8859_6.obj mbfilter_iso8859_7.obj mbfilter_iso8859_8.obj mbfilter_iso8859_9.obj mbfilter_iso8859_10.obj mbfilter_iso8859_13.obj mbfilter_iso8859_14.obj mbfilter_iso8859_15.obj mbfilter_iso8859_16.obj mbfilter_htmlent.obj mbfilter_byte2.obj mbfilter_byte4.obj mbfilter_uuencode.obj mbfilter_base64.obj mbfilter_sjis.obj mbfilter_7bit.obj mbfilter_qprint.obj mbfilter_ucs4.obj mbfilter_ucs2.obj mbfilter_utf32.obj mbfilter_utf16.obj mbfilter_utf8.obj mbfilter_utf7.obj mbfilter_utf7imap.obj mbfilter_euc_jp_win.obj mbfilter_cp932.obj mbfilter_euc_cn.obj mbfilter_euc_kr.obj mbfilter_uhc.obj mbfilter_iso2022_kr.obj mbfilter_cp866.obj mbfilter_koi8r.obj html_entities.obj mbfilter_armscii8.obj
+OBJS=mbfilter_cp936.obj \
+       mbfilter_hz.obj \
+       mbfilter_euc_tw.obj \
+       mbfilter_big5.obj \
+       mbfilter_euc_jp.obj \
+       mbfilter_jis.obj \
+       mbfilter_iso8859_1.obj \
+       mbfilter_iso8859_2.obj \
+       mbfilter_cp1252.obj \
+       mbfilter_cp1251.obj \
+       mbfilter_cp1254.obj \
+       mbfilter_ascii.obj \
+       mbfilter_iso8859_3.obj \
+       mbfilter_iso8859_4.obj \
+       mbfilter_iso8859_5.obj \
+       mbfilter_iso8859_6.obj \
+       mbfilter_iso8859_7.obj \
+       mbfilter_iso8859_8.obj \
+       mbfilter_iso8859_9.obj \
+       mbfilter_iso8859_10.obj \
+       mbfilter_iso8859_13.obj \
+       mbfilter_iso8859_14.obj \
+       mbfilter_iso8859_15.obj \
+       mbfilter_iso8859_16.obj \
+       mbfilter_htmlent.obj \
+       mbfilter_byte2.obj \
+       mbfilter_byte4.obj \
+       mbfilter_uuencode.obj \
+       mbfilter_base64.obj \
+       mbfilter_sjis.obj \
+       mbfilter_7bit.obj \
+       mbfilter_qprint.obj \
+       mbfilter_ucs4.obj \
+       mbfilter_ucs2.obj \
+       mbfilter_utf32.obj \
+       mbfilter_utf16.obj \
+       mbfilter_utf8.obj \
+       mbfilter_utf7.obj \
+       mbfilter_utf7imap.obj \
+       mbfilter_euc_jp_win.obj \
+       mbfilter_cp932.obj \
+       mbfilter_euc_cn.obj \
+       mbfilter_euc_kr.obj \
+       mbfilter_uhc.obj \
+       mbfilter_iso2022_kr.obj \
+       mbfilter_cp866.obj \
+       mbfilter_koi8r.obj \
+       mbfilter_koi8u.obj \
+       html_entities.obj \
+       mbfilter_armscii8.obj \
+       mbfilter_cp850.obj
 
 all: $(OBJS)
 
diff --git a/ext/mbstring/libmbfl/filters/mbfilter_cp1254.c b/ext/mbstring/libmbfl/filters/mbfilter_cp1254.c
new file mode 100644 (file)
index 0000000..7e93342
--- /dev/null
@@ -0,0 +1,157 @@
+/*
+ * "streamable kanji code filter and converter"
+ * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved.
+ *
+ * LICENSE NOTICES
+ *
+ * This file is part of "streamable kanji code filter and converter",
+ * which is distributed under the terms of GNU Lesser General Public 
+ * License (version 2) as published by the Free Software Foundation.
+ *
+ * This software is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with "streamable kanji code filter and converter";
+ * if not, write to the Free Software Foundation, Inc., 59 Temple Place,
+ * Suite 330, Boston, MA  02111-1307  USA
+ *
+ * The author of this part: Haluk AKIN <halukakin@gmail.com>
+ *
+ */
+/*
+ * The source code included in this files was separated from mbfilter_ru.c
+ * by moriyoshi koizumi <moriyoshi@php.net> on 4 dec 2002.
+ * 
+ */
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "mbfilter.h"
+#include "mbfilter_cp1254.h"
+#include "unicode_table_cp1254.h"
+
+static int mbfl_filt_ident_cp1254(int c, mbfl_identify_filter *filter);
+
+static const char *mbfl_encoding_cp1254_aliases[] = {"CP1254", "CP-1254", "WINDOWS-1254", NULL};
+
+const mbfl_encoding mbfl_encoding_cp1254 = {
+       mbfl_no_encoding_cp1254,
+       "Windows-1254",
+       "Windows-1254",
+       (const char *(*)[])&mbfl_encoding_cp1254_aliases,
+       NULL,
+       MBFL_ENCTYPE_SBCS
+};
+
+const struct mbfl_identify_vtbl vtbl_identify_cp1254 = {
+       mbfl_no_encoding_cp1254,
+       mbfl_filt_ident_common_ctor,
+       mbfl_filt_ident_common_dtor,
+       mbfl_filt_ident_cp1254
+};
+
+const struct mbfl_convert_vtbl vtbl_cp1254_wchar = {
+       mbfl_no_encoding_cp1254,
+       mbfl_no_encoding_wchar,
+       mbfl_filt_conv_common_ctor,
+       mbfl_filt_conv_common_dtor,
+       mbfl_filt_conv_cp1254_wchar,
+       mbfl_filt_conv_common_flush
+};
+
+const struct mbfl_convert_vtbl vtbl_wchar_cp1254 = {
+       mbfl_no_encoding_wchar,
+       mbfl_no_encoding_cp1254,
+       mbfl_filt_conv_common_ctor,
+       mbfl_filt_conv_common_dtor,
+       mbfl_filt_conv_wchar_cp1254,
+       mbfl_filt_conv_common_flush
+};
+
+#define CK(statement)  do { if ((statement) < 0) return (-1); } while (0)
+
+/*
+ * wchar => cp1254
+ */
+int
+mbfl_filt_conv_wchar_cp1254(int c, mbfl_convert_filter *filter)
+{
+       int s, n;
+
+       if (c < 0x80) {
+               s = c;
+       } else {
+               s = -1;
+               n = cp1254_ucs_table_len-1;
+               while (n >= 0) {
+                       if (c == cp1254_ucs_table[n] && c != 0xfffe) {
+                               s = cp1254_ucs_table_min + n;
+                               break;
+                       }
+                       n--;
+               }
+               if (s <= 0 && (c & ~MBFL_WCSPLANE_MASK) == MBFL_WCSPLANE_CP1254) {
+                       s = c & MBFL_WCSPLANE_MASK;
+               }
+       }
+
+       if (s >= 0) {
+               CK((*filter->output_function)(s, filter->data));
+       } else {
+               if (filter->illegal_mode != MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE) {
+                       CK(mbfl_filt_conv_illegal_output(c, filter));
+               }
+       }
+
+       return c;
+}
+
+/*
+ * cp1254 => wchar
+ */
+int
+mbfl_filt_conv_cp1254_wchar(int c, mbfl_convert_filter *filter)
+{
+       int s;
+
+       if (c >= 0 && c < cp1254_ucs_table_min) {
+               s = c;
+       } else if (c >= cp1254_ucs_table_min && c < 0x100) {
+               s = cp1254_ucs_table[c - cp1254_ucs_table_min];
+               if (s <= 0) {
+                       s = c;
+                       s &= MBFL_WCSPLANE_MASK;
+                       s |= MBFL_WCSPLANE_CP1254;
+               }
+       } else {
+               s = c;
+               s &= MBFL_WCSGROUP_MASK;
+               s |= MBFL_WCSGROUP_THROUGH;
+       }
+
+       CK((*filter->output_function)(s, filter->data));
+
+       return c;
+}
+
+/* We only distinguish the MS extensions to ISO-8859-1.
+ * Actually, this is pretty much a NO-OP, since the identification
+ * system doesn't allow us to discriminate between a positive match,
+ * a possible match and a definite non-match.
+ * The problem here is that cp1254 looks like SJIS for certain chars.
+ * */
+static int mbfl_filt_ident_cp1254(int c, mbfl_identify_filter *filter)
+{
+       if (c >= 0x80 && c < 0xff)
+               filter->flag = 0;
+       else
+               filter->flag = 1; /* not it */
+       return c;       
+}
+
+
diff --git a/ext/mbstring/libmbfl/filters/mbfilter_cp1254.h b/ext/mbstring/libmbfl/filters/mbfilter_cp1254.h
new file mode 100644 (file)
index 0000000..3200d52
--- /dev/null
@@ -0,0 +1,43 @@
+/*
+ * "streamable kanji code filter and converter"
+ * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved.
+ *
+ * LICENSE NOTICES
+ *
+ * This file is part of "streamable kanji code filter and converter",
+ * which is distributed under the terms of GNU Lesser General Public 
+ * License (version 2) as published by the Free Software Foundation.
+ *
+ * This software is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with "streamable kanji code filter and converter";
+ * if not, write to the Free Software Foundation, Inc., 59 Temple Place,
+ * Suite 330, Boston, MA  02111-1307  USA
+ *
+ * The author of this part: Haluk AKIN <halukakin@gmail.com>
+ *
+ */
+/*
+ * the source code included in this files was separated from mbfilter.c
+ * by moriyoshi koizumi <moriyoshi@php.net> on 4 dec 2002.
+ *
+ */
+
+#ifndef MBFL_MBFILTER_CP1254_H
+#define MBFL_MBFILTER_CP1254_H
+
+#include "mbfilter.h"
+
+extern const mbfl_encoding mbfl_encoding_cp1254;
+extern const struct mbfl_identify_vtbl vtbl_identify_cp1254;
+extern const struct mbfl_convert_vtbl vtbl_cp1254_wchar;
+extern const struct mbfl_convert_vtbl vtbl_wchar_cp1254;
+
+int mbfl_filt_conv_wchar_cp1254(int c, mbfl_convert_filter *filter);
+int mbfl_filt_conv_cp1254_wchar(int c, mbfl_convert_filter *filter);
+
+#endif /* MBFL_MBFILTER_CP1254_H */
diff --git a/ext/mbstring/libmbfl/filters/mbfilter_cp850.c b/ext/mbstring/libmbfl/filters/mbfilter_cp850.c
new file mode 100644 (file)
index 0000000..5388c04
--- /dev/null
@@ -0,0 +1,147 @@
+/*
+ * "streamable kanji code filter and converter"
+ * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved.
+ *
+ * LICENSE NOTICES
+ *
+ * This file is part of "streamable kanji code filter and converter",
+ * which is distributed under the terms of GNU Lesser General Public 
+ * License (version 2) as published by the Free Software Foundation.
+ *
+ * This software is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with "streamable kanji code filter and converter";
+ * if not, write to the Free Software Foundation, Inc., 59 Temple Place,
+ * Suite 330, Boston, MA  02111-1307  USA
+ *
+ * The author of this part: Den V. Tsopa <tdv@edisoft.ru>
+ * Adaption for CP850: D. Giffeler <dg@artegic.de>
+ *
+ */
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "mbfilter.h"
+#include "mbfilter_cp850.h"
+#include "unicode_table_cp850.h"
+
+static int mbfl_filt_ident_cp850(int c, mbfl_identify_filter *filter);
+
+static const char *mbfl_encoding_cp850_aliases[] = {"CP850", "CP-850", "IBM-850", NULL};
+
+const mbfl_encoding mbfl_encoding_cp850 = {
+       mbfl_no_encoding_cp850,
+       "CP850",
+       "CP850",
+       (const char *(*)[])&mbfl_encoding_cp850_aliases,
+       NULL,
+       MBFL_ENCTYPE_SBCS
+};
+
+const struct mbfl_identify_vtbl vtbl_identify_cp850 = {
+       mbfl_no_encoding_cp850,
+       mbfl_filt_ident_common_ctor,
+       mbfl_filt_ident_common_dtor,
+       mbfl_filt_ident_cp850
+};
+
+const struct mbfl_convert_vtbl vtbl_wchar_cp850 = {
+       mbfl_no_encoding_wchar,
+       mbfl_no_encoding_cp850,
+       mbfl_filt_conv_common_ctor,
+       mbfl_filt_conv_common_dtor,
+       mbfl_filt_conv_wchar_cp850,
+       mbfl_filt_conv_common_flush
+};
+
+const struct mbfl_convert_vtbl vtbl_cp850_wchar = {
+       mbfl_no_encoding_cp850,
+       mbfl_no_encoding_wchar,
+       mbfl_filt_conv_common_ctor,
+       mbfl_filt_conv_common_dtor,
+       mbfl_filt_conv_cp850_wchar,
+       mbfl_filt_conv_common_flush
+};
+
+#define CK(statement)  do { if ((statement) < 0) return (-1); } while (0)
+
+/*
+ * cp850 => wchar
+ */
+int
+mbfl_filt_conv_cp850_wchar(int c, mbfl_convert_filter *filter)
+{
+       int s;
+
+       if (c >= 0 && c < cp850_ucs_table_min) {
+               s = c;
+       } else if (c >= cp850_ucs_table_min && c < 0x100) {
+               s = cp850_ucs_table[c - cp850_ucs_table_min];
+               if (s <= 0) {
+                       s = c;
+                       s &= MBFL_WCSPLANE_MASK;
+                       s |= MBFL_WCSPLANE_CP850;
+               }
+       } else {
+               s = c;
+               s &= MBFL_WCSGROUP_MASK;
+               s |= MBFL_WCSGROUP_THROUGH;
+       }
+
+       CK((*filter->output_function)(s, filter->data));
+
+       return c;
+}
+
+/*
+ * wchar => cp850
+ */
+int
+mbfl_filt_conv_wchar_cp850(int c, mbfl_convert_filter *filter)
+{
+       int s, n;
+
+       if (c < 0x80) {
+               s = c;
+       } else {
+               s = -1;
+               n = cp850_ucs_table_len-1;
+               while (n >= 0) {
+                       if (c == cp850_ucs_table[n]) {
+                               s = cp850_ucs_table_min + n;
+                               break;
+                       }
+                       n--;
+               }
+               if (s <= 0 && (c & ~MBFL_WCSPLANE_MASK) == MBFL_WCSPLANE_CP850) {
+                       s = c & MBFL_WCSPLANE_MASK;
+               }
+       }
+
+       if (s >= 0) {
+               CK((*filter->output_function)(s, filter->data));
+       } else {
+               if (filter->illegal_mode != MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE) {
+                       CK(mbfl_filt_conv_illegal_output(c, filter));
+               }
+       }
+
+       return c;
+}
+
+static int mbfl_filt_ident_cp850(int c, mbfl_identify_filter *filter)
+{
+       if (c >= 0x80 && c < 0xff)
+               filter->flag = 0;
+       else
+               filter->flag = 1; /* not it */
+       return c;       
+}
+
+
diff --git a/ext/mbstring/libmbfl/filters/mbfilter_cp850.h b/ext/mbstring/libmbfl/filters/mbfilter_cp850.h
new file mode 100644 (file)
index 0000000..a1e0f9c
--- /dev/null
@@ -0,0 +1,37 @@
+/*
+ * "streamable kanji code filter and converter"
+ * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved.
+ *
+ * LICENSE NOTICES
+ *
+ * This file is part of "streamable kanji code filter and converter",
+ * which is distributed under the terms of GNU Lesser General Public 
+ * License (version 2) as published by the Free Software Foundation.
+ *
+ * This software is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with "streamable kanji code filter and converter";
+ * if not, write to the Free Software Foundation, Inc., 59 Temple Place,
+ * Suite 330, Boston, MA  02111-1307  USA
+ *
+ * The author of this part: Den V. Tsopa <tdv@edisoft.ru>
+ * Adaption for CP850: D. Giffeler <dg@artegic.de>
+ *
+ */
+
+#ifndef MBFL_MBFILTER_CP850_H
+#define MBFL_MBFILTER_CP850_H
+
+extern const mbfl_encoding mbfl_encoding_cp850;
+extern const struct mbfl_identify_vtbl vtbl_identify_cp850;
+extern const struct mbfl_convert_vtbl vtbl_wchar_cp850;
+extern const struct mbfl_convert_vtbl vtbl_cp850_wchar;
+
+int mbfl_filt_conv_cp850_wchar(int c, mbfl_convert_filter *filter);
+int mbfl_filt_conv_wchar_cp850(int c, mbfl_convert_filter *filter);
+
+#endif /* MBFL_MBFILTER_CP850_H */
index bc07f13438580d0e4ca8fc6f556e80181f6f60db..ece0c7ee412e0903aab50a98c4b3b2a905690937 100644 (file)
@@ -204,6 +204,9 @@ mbfl_filt_conv_eucjpwin_wchar(int c, mbfl_convert_filter *filter)
                        s = (c1 - 0xa1)*94 + c - 0xa1;
                        if (s >= 0 && s < jisx0212_ucs_table_size) {
                                w = jisx0212_ucs_table[s];
+                               if (w == 0x007e) {
+                                       w = 0xff5e;             /* FULLWIDTH TILDE */
+                               }
                        } else if (s >= (82*94) && s < (84*94)) {       /* vender ext3 (83ku - 84ku) <-> CP932 (115ku -120ku) */
                                s = (c1<< 8) | c;
                                w = 0;
@@ -222,6 +225,9 @@ mbfl_filt_conv_eucjpwin_wchar(int c, mbfl_convert_filter *filter)
                        } else {
                                w = 0;
                        }
+                       if (w == 0x00A6) {
+                               w = 0xFFE4;             /* FULLWIDTH BROKEN BAR */
+                       }
                        if (w <= 0) {
                                w = ((c1 & 0x7f) << 8) | (c & 0x7f);
                                w &= MBFL_WCSPLANE_MASK;
@@ -274,6 +280,9 @@ mbfl_filt_conv_wchar_eucjpwin(int c, mbfl_convert_filter *filter)
                c2 = s1%94 + 0xa1;
                s1 = (c1 << 8) | c2;
        }
+       if (s1 == 0xa2f1) {
+               s1 = 0x2d62;            /* NUMERO SIGN */
+       }
        if (s1 <= 0) {
                c1 = c & ~MBFL_WCSPLANE_MASK;
                if (c1 == MBFL_WCSPLANE_WINCP932) {
@@ -311,6 +320,8 @@ mbfl_filt_conv_wchar_eucjpwin(int c, mbfl_convert_filter *filter)
                        s1 = 0x2172;
                } else if (c == 0xffe2) {       /* FULLWIDTH NOT SIGN */
                        s1 = 0x224c;
+               } else if (c == 0xff5e) {       /* FULLWIDTH TILDE */
+                       s1 = 0x2141;
                } else {
                        s1 = -1;
                        c1 = 0;
diff --git a/ext/mbstring/libmbfl/filters/mbfilter_iso2022_jp_ms.c b/ext/mbstring/libmbfl/filters/mbfilter_iso2022_jp_ms.c
new file mode 100644 (file)
index 0000000..df96167
--- /dev/null
@@ -0,0 +1,522 @@
+/*
+ * "streamable kanji code filter and converter"
+ * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved.
+ *
+ * LICENSE NOTICES
+ *
+ * This file is part of "streamable kanji code filter and converter",
+ * which is distributed under the terms of GNU Lesser General Public 
+ * License (version 2) as published by the Free Software Foundation.
+ *
+ * This software is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with "streamable kanji code filter and converter";
+ * if not, write to the Free Software Foundation, Inc., 59 Temple Place,
+ * Suite 330, Boston, MA  02111-1307  USA
+ *
+ * The author of this file:
+ *
+ */
+/*
+ * The source code included in this files was separated from mbfilter_ja.c
+ * by moriyoshi koizumi <moriyoshi@php.net> on 4 dec 2002.
+ * 
+ */
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "mbfilter.h"
+#include "mbfilter_iso2022_jp_ms.h"
+
+#include "unicode_table_cp932_ext.h"
+#include "unicode_table_jis.h"
+#include "cp932_table.h"
+
+static int mbfl_filt_ident_2022jpms(int c, mbfl_identify_filter *filter);
+
+static const char *mbfl_encoding_2022jpms_aliases[] = {"ISO2022JPMS", NULL};
+
+const mbfl_encoding mbfl_encoding_2022jpms = {
+       mbfl_no_encoding_2022jpms,
+       "ISO-2022-JP-MS",
+       "ISO-2022-JP",
+       (const char *(*)[])&mbfl_encoding_2022jpms_aliases,
+       NULL,
+       MBFL_ENCTYPE_MBCS | MBFL_ENCTYPE_SHFTCODE
+};
+
+const struct mbfl_identify_vtbl vtbl_identify_2022jpms = {
+       mbfl_no_encoding_2022jpms,
+       mbfl_filt_ident_common_ctor,
+       mbfl_filt_ident_common_dtor,
+       mbfl_filt_ident_2022jpms
+};
+
+const struct mbfl_convert_vtbl vtbl_2022jpms_wchar = {
+       mbfl_no_encoding_2022jpms,
+       mbfl_no_encoding_wchar,
+       mbfl_filt_conv_common_ctor,
+       mbfl_filt_conv_common_dtor,
+       mbfl_filt_conv_2022jpms_wchar,
+       mbfl_filt_conv_common_flush
+};
+
+const struct mbfl_convert_vtbl vtbl_wchar_2022jpms = {
+       mbfl_no_encoding_wchar,
+       mbfl_no_encoding_2022jpms,
+       mbfl_filt_conv_common_ctor,
+       mbfl_filt_conv_common_dtor,
+       mbfl_filt_conv_wchar_2022jpms,
+       mbfl_filt_conv_any_2022jpms_flush
+};
+
+#define CK(statement)  do { if ((statement) < 0) return (-1); } while (0)
+
+#define sjistoidx(c1, c2) \
+        (((c1) > 0x9f) \
+        ? (((c1) - 0xc1) * 188 + (c2) - (((c2) > 0x7e) ? 0x41 : 0x40)) \
+        : (((c1) - 0x81) * 188 + (c2) - (((c2) > 0x7e) ? 0x41 : 0x40)))
+#define idxtojis1(c) (((c) / 94) + 0x21)
+#define idxtojis2(c) (((c) % 94) + 0x21)
+
+/*
+ * ISO-2022-JP-MS => wchar
+ */
+int
+mbfl_filt_conv_2022jpms_wchar(int c, mbfl_convert_filter *filter)
+{
+       int c1, s, w;
+
+retry:
+       switch (filter->status & 0xf) {
+/*     case 0x00:       ASCII */
+/*     case 0x10:       X 0201 latin */
+/*     case 0x20:       X 0201 kana */
+/*     case 0x80:       X 0208 */
+/*     case 0xa0:       UDC */
+       case 0:
+               if (c == 0x1b) {
+                       filter->status += 2;
+               } else if (filter->status == 0x20 && c > 0x20 && c < 0x60) {            /* kana */
+                       CK((*filter->output_function)(0xff40 + c, filter->data));
+               } else if ((filter->status == 0x80 || filter->status == 0xa0) && c > 0x20 && c < 0x80) {                /* kanji first char */
+                       filter->cache = c;
+                       filter->status += 1;
+               } else if (c >= 0 && c < 0x80) {                /* latin, CTLs */
+                       CK((*filter->output_function)(c, filter->data));
+               } else if (c > 0xa0 && c < 0xe0) {      /* GR kana */
+                       CK((*filter->output_function)(0xfec0 + c, filter->data));
+               } else {
+                       w = c & MBFL_WCSGROUP_MASK;
+                       w |= MBFL_WCSGROUP_THROUGH;
+                       CK((*filter->output_function)(w, filter->data));
+               }
+               break;
+
+/*     case 0x81:       X 0208 second char */
+/*     case 0xa1:       UDC second char */
+       case 1:
+               w = 0;
+               filter->status &= ~0xf;
+               c1 = filter->cache;
+               if (c > 0x20 && c < 0x7f) {
+                       s = (c1 - 0x21)*94 + c - 0x21;
+                       if (filter->status == 0x80) {
+                               if (s <= 137) {
+                                       if (s == 31) {
+                                               w = 0xff3c;                     /* FULLWIDTH REVERSE SOLIDUS */
+                                       } else if (s == 32) {
+                                               w = 0xff5e;                     /* FULLWIDTH TILDE */
+                                       } else if (s == 33) {
+                                               w = 0x2225;                     /* PARALLEL TO */
+                                       } else if (s == 60) {
+                                               w = 0xff0d;                     /* FULLWIDTH HYPHEN-MINUS */
+                                       } else if (s == 80) {
+                                               w = 0xffe0;                     /* FULLWIDTH CENT SIGN */
+                                       } else if (s == 81) {
+                                               w = 0xffe1;                     /* FULLWIDTH POUND SIGN */
+                                       } else if (s == 137) {
+                                               w = 0xffe2;                     /* FULLWIDTH NOT SIGN */
+                                       }
+                               }
+                               if (w == 0) {
+                                       if (s >= cp932ext1_ucs_table_min && s < cp932ext1_ucs_table_max) {              /* vendor ext1 (13ku) */
+                                               w = cp932ext1_ucs_table[s - cp932ext1_ucs_table_min];
+                                       } else if (s >= 0 && s < jisx0208_ucs_table_size) {
+                                               w = jisx0208_ucs_table[s];
+                                       } else if (s >= cp932ext2_ucs_table_min && s < cp932ext2_ucs_table_max) {               /* vendor ext2 (89ku - 92ku) */
+                                               w = cp932ext2_ucs_table[s - cp932ext2_ucs_table_min];
+                                       } else {
+                                               w = 0;
+                                       }
+                               }
+                               if (w <= 0) {
+                                       w = (c1 << 8) | c;
+                                       w &= MBFL_WCSPLANE_MASK;
+                                       w |= MBFL_WCSPLANE_JIS0208;
+                               }
+                               CK((*filter->output_function)(w, filter->data));
+                       } else {
+                               if (c1 > 0x20 && c1 < 0x35) {
+                                       w = 0xe000 + (c1 - 0x21)*94 + c - 0x21;
+                               }
+                               if (w <= 0) {
+                                       w = (((c1 - 0x21) + 0x7f) << 8) | c;
+                                       w &= MBFL_WCSPLANE_MASK;
+                                       w |= MBFL_WCSPLANE_JIS0208;
+                               }
+                               CK((*filter->output_function)(w, filter->data));
+                       }
+               } else if (c == 0x1b) {
+                       filter->status += 2;
+               } else if ((c >= 0 && c < 0x21) || c == 0x7f) {         /* CTLs */
+                       CK((*filter->output_function)(c, filter->data));
+               } else {
+                       w = (c1 << 8) | c;
+                       w &= MBFL_WCSGROUP_MASK;
+                       w |= MBFL_WCSGROUP_THROUGH;
+                       CK((*filter->output_function)(w, filter->data));
+               }
+               break;
+
+       /* ESC */
+/*     case 0x02:      */
+/*     case 0x12:      */
+/*     case 0x22:      */
+/*     case 0x82:      */
+/*     case 0xa2:      */
+       case 2:
+               if (c == 0x24) {                /* '$' */
+                       filter->status++;
+               } else if (c == 0x28) {         /* '(' */
+                       filter->status += 3;
+               } else {
+                       filter->status &= ~0xf;
+                       CK((*filter->output_function)(0x1b, filter->data));
+                       goto retry;
+               }
+               break;
+
+       /* ESC $ */
+/*     case 0x03:      */
+/*     case 0x13:      */
+/*     case 0x23:      */
+/*     case 0x83:      */
+/*     case 0xa3:      */
+       case 3:
+               if (c == 0x40 || c == 0x42) {   /* '@' or 'B' */
+                       filter->status = 0x80;
+               } else if (c == 0x28) {     /* '(' */
+                       filter->status++;
+               } else {
+                       filter->status &= ~0xf;
+                       CK((*filter->output_function)(0x1b, filter->data));
+                       CK((*filter->output_function)(0x24, filter->data));
+                       goto retry;
+               }
+               break;
+
+       /* ESC $ ( */
+/*     case 0x04:      */
+/*     case 0x14:      */
+/*     case 0x24:      */
+/*     case 0x84:      */
+/*     case 0xa4:      */
+       case 4:
+               if (c == 0x40 || c == 0x42) {   /* '@' or 'B' */
+                       filter->status = 0x80;
+               } else if (c == 0x3f) {                 /* '?' */
+                       filter->status = 0xa0;
+               } else {
+                       filter->status &= ~0xf;
+                       CK((*filter->output_function)(0x1b, filter->data));
+                       CK((*filter->output_function)(0x24, filter->data));
+                       CK((*filter->output_function)(0x28, filter->data));
+                       goto retry;
+               }
+               break;
+
+       /* ESC ( */
+/*     case 0x05:      */
+/*     case 0x15:      */
+/*     case 0x25:      */
+/*     case 0x85:      */
+/*     case 0xa5:      */
+       case 5:
+               if (c == 0x42) {                /* 'B' */
+                       filter->status = 0;
+               } else if (c == 0x4a) {         /* 'J' */
+                       filter->status = 0;
+               } else if (c == 0x49) {         /* 'I' */
+                       filter->status = 0x20;
+               } else {
+                       filter->status &= ~0xf;
+                       CK((*filter->output_function)(0x1b, filter->data));
+                       CK((*filter->output_function)(0x28, filter->data));
+                       goto retry;
+               }
+               break;
+
+       default:
+               filter->status = 0;
+               break;
+       }
+
+       return c;
+}
+
+static int
+cp932ext3_cp932ext2_jis(int c)
+{
+       int idx;
+
+       idx = sjistoidx(0xfa, 0x40) + c;
+       if (idx >= sjistoidx(0xfa, 0x5c))
+               idx -=  sjistoidx(0xfa, 0x5c) - sjistoidx(0xed, 0x40);
+       else if (idx >= sjistoidx(0xfa, 0x55))
+               idx -=  sjistoidx(0xfa, 0x55) - sjistoidx(0xee, 0xfa);
+       else if (idx >= sjistoidx(0xfa, 0x40))
+               idx -=  sjistoidx(0xfa, 0x40) - sjistoidx(0xee, 0xef);
+       return idxtojis1(idx) << 8 | idxtojis2(idx);
+}
+
+/*
+ * wchar => ISO-2022-JP-MS
+ */
+int
+mbfl_filt_conv_wchar_2022jpms(int c, mbfl_convert_filter *filter)
+{
+       int c1, c2, s1, s2;
+
+       s1 = 0;
+       s2 = 0;
+       if (c >= ucs_a1_jis_table_min && c < ucs_a1_jis_table_max) {
+               s1 = ucs_a1_jis_table[c - ucs_a1_jis_table_min];
+       } else if (c >= ucs_a2_jis_table_min && c < ucs_a2_jis_table_max) {
+               s1 = ucs_a2_jis_table[c - ucs_a2_jis_table_min];
+       } else if (c >= ucs_i_jis_table_min && c < ucs_i_jis_table_max) {
+               s1 = ucs_i_jis_table[c - ucs_i_jis_table_min];
+       } else if (c >= ucs_r_jis_table_min && c < ucs_r_jis_table_max) {
+               s1 = ucs_r_jis_table[c - ucs_r_jis_table_min];
+       } else if (c >= 0xe000 && c < (0xe000 + 20*94)) {       /* user  (95ku - 114ku) */
+               s1 = c - 0xe000;
+               c1 = s1/94 + 0x7f;
+               c2 = s1%94 + 0x21;
+               s1 = (c1 << 8) | c2;
+       }
+       if (s1 <= 0) {
+               c1 = c & ~MBFL_WCSPLANE_MASK;
+               if (c1 == MBFL_WCSPLANE_WINCP932) {
+                       s1 = c & MBFL_WCSPLANE_MASK;
+                       s2 = 1;
+               } else if (c1 == MBFL_WCSPLANE_JIS0208) {
+                       s1 = c & MBFL_WCSPLANE_MASK;
+               } else if (c1 == MBFL_WCSPLANE_JIS0212) {
+                       s1 = c & MBFL_WCSPLANE_MASK;
+                       s1 |= 0x8080;
+               } else if (c == 0xa5) {         /* YEN SIGN */
+                       s1 = 0x216f;    /* FULLWIDTH YEN SIGN */
+               } else if (c == 0x203e) {       /* OVER LINE */
+                       s1 = 0x2131;    /* FULLWIDTH MACRON */
+               } else if (c == 0xff3c) {       /* FULLWIDTH REVERSE SOLIDUS */
+                       s1 = 0x2140;
+               } else if (c == 0xff5e) {       /* FULLWIDTH TILDE */
+                       s1 = 0x2141;
+               } else if (c == 0x2225) {       /* PARALLEL TO */
+                       s1 = 0x2142;
+               } else if (c == 0xff0d) {       /* FULLWIDTH HYPHEN-MINUS */
+                       s1 = 0x215d;
+               } else if (c == 0xffe0) {       /* FULLWIDTH CENT SIGN */
+                       s1 = 0x2171;
+               } else if (c == 0xffe1) {       /* FULLWIDTH POUND SIGN */
+                       s1 = 0x2172;
+               } else if (c == 0xffe2) {       /* FULLWIDTH NOT SIGN */
+                       s1 = 0x224c;
+               }
+       }
+       if ((s1 <= 0) || (s1 >= 0xa1a1 && s2 == 0)) { /* not found or X 0212 */
+               s1 = -1;
+               c1 = 0;
+               c2 = cp932ext1_ucs_table_max - cp932ext1_ucs_table_min;
+               while (c1 < c2) {               /* CP932 vendor ext1 (13ku) */
+                       if (c == cp932ext1_ucs_table[c1]) {
+                               s1 = ((c1/94 + 0x2d) << 8) + (c1%94 + 0x21);
+                               break;
+                       }
+                       c1++;
+               }
+               if (s1 <= 0) {
+                       c1 = 0;
+                       c2 = cp932ext3_ucs_table_max - cp932ext3_ucs_table_min;
+                       while (c1 < c2) {               /* CP932 vendor ext3 (115ku - 119ku) */
+                               if (c == cp932ext3_ucs_table[c1]) {
+                                       s1 = cp932ext3_cp932ext2_jis(c1);
+                                       break;
+                               }
+                               c1++;
+                       }
+               }
+               if (c == 0) {
+                       s1 = 0;
+               } else if (s1 <= 0) {
+                       s1 = -1;
+               }
+       }
+       if (s1 >= 0) {
+               if (s1 < 0x80) { /* latin */
+                       if ((filter->status & 0xff00) != 0) {
+                               CK((*filter->output_function)(0x1b, filter->data));             /* ESC */
+                               CK((*filter->output_function)(0x28, filter->data));             /* '(' */
+                               CK((*filter->output_function)(0x42, filter->data));             /* 'B' */
+                       }
+                       CK((*filter->output_function)(s1, filter->data));
+                       filter->status = 0;
+               } else if (s1 > 0xa0 && s1 < 0xe0) { /* kana */
+                       if ((filter->status & 0xff00) != 0x100) {
+                               CK((*filter->output_function)(0x1b, filter->data));             /* ESC */
+                               CK((*filter->output_function)(0x28, filter->data));             /* '(' */
+                               CK((*filter->output_function)(0x49, filter->data));             /* 'I' */
+                       }
+                       filter->status = 0x100;
+                       CK((*filter->output_function)(s1 & 0x7f, filter->data));
+               } else if (s1 < 0x7e7f) { /* X 0208 */
+                       if ((filter->status & 0xff00) != 0x200) {
+                               CK((*filter->output_function)(0x1b, filter->data));             /* ESC */
+                               CK((*filter->output_function)(0x24, filter->data));             /* '$' */
+                               CK((*filter->output_function)(0x42, filter->data));             /* 'B' */
+                       }
+                       filter->status = 0x200;
+                       CK((*filter->output_function)((s1 >> 8) & 0xff, filter->data));
+                       CK((*filter->output_function)(s1 & 0x7f, filter->data));
+               } else if (s1 < 0x927f) { /* UDC */
+                       if ((filter->status & 0xff00) != 0x800) {
+                               CK((*filter->output_function)(0x1b, filter->data));             /* ESC */
+                               CK((*filter->output_function)(0x24, filter->data));             /* '$' */
+                               CK((*filter->output_function)(0x28, filter->data));             /* '(' */
+                               CK((*filter->output_function)(0x3f, filter->data));             /* '?' */
+                       }
+                       filter->status = 0x800;
+                       CK((*filter->output_function)(((s1 >> 8) - 0x5e) & 0x7f, filter->data));
+                       CK((*filter->output_function)(s1 & 0x7f, filter->data));
+               }
+       } else {
+               if (filter->illegal_mode != MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE) {
+                       CK(mbfl_filt_conv_illegal_output(c, filter));
+               }
+       }
+
+       return c;
+}
+
+int
+mbfl_filt_conv_any_2022jpms_flush(mbfl_convert_filter *filter)
+{
+       /* back to latin */
+       if ((filter->status & 0xff00) != 0) {
+               CK((*filter->output_function)(0x1b, filter->data));             /* ESC */
+               CK((*filter->output_function)(0x28, filter->data));             /* '(' */
+               CK((*filter->output_function)(0x42, filter->data));             /* 'B' */
+       }
+       filter->status &= 0xff;
+       return 0;
+}
+
+static int mbfl_filt_ident_2022jpms(int c, mbfl_identify_filter *filter)
+{
+retry:
+       switch (filter->status & 0xf) {
+/*     case 0x00:       ASCII */
+/*     case 0x10:       X 0201 latin */
+/*     case 0x20:       X 0201 kana */
+/*     case 0x80:       X 0208 */
+/*     case 0xa0:       X UDC */
+       case 0:
+               if (c == 0x1b) {
+                       filter->status += 2;
+               } else if ((filter->status == 0x80 || filter->status == 0xa0) && c > 0x20 && c < 0x80) {                /* kanji first char */
+                       filter->status += 1;
+               } else if (c >= 0 && c < 0x80) {                /* latin, CTLs */
+                       ;
+               } else {
+                       filter->flag = 1;       /* bad */
+               }
+               break;
+
+/*     case 0x81:       X 0208 second char */
+/*     case 0xa1:       UDC second char */
+       case 1:
+               filter->status &= ~0xf;
+               if (c == 0x1b) {
+                       goto retry;
+               } else if (c < 0x21 || c > 0x7e) {              /* bad */
+                       filter->flag = 1;
+               }
+               break;
+
+       /* ESC */
+       case 2:
+               if (c == 0x24) {                /* '$' */
+                       filter->status++;
+               } else if (c == 0x28) {         /* '(' */
+                       filter->status += 3;
+               } else {
+                       filter->flag = 1;       /* bad */
+                       filter->status &= ~0xf;
+                       goto retry;
+               }
+               break;
+
+       /* ESC $ */
+       case 3:
+               if (c == 0x40 || c == 0x42) {           /* '@' or 'B' */
+                       filter->status = 0x80;
+               } else if (c == 0x28) {     /* '(' */
+                       filter->status++;
+               } else {
+                       filter->flag = 1;       /* bad */
+                       filter->status &= ~0xf;
+                       goto retry;
+               }
+               break;
+
+       /* ESC $ ( */
+       case 4:
+               if (c == 0x40 || c == 0x42) {           /* '@' or 'B' */
+                       filter->status = 0x80;
+               } else if (c == 0x3f) {         /* '?' */
+                       filter->status = 0xa0;
+               } else {
+                       filter->flag = 1;       /* bad */
+                       filter->status &= ~0xf;
+                       goto retry;
+               }
+               break;
+
+       /* ESC ( */
+       case 5:
+               if (c == 0x42) {                /* 'B' */
+                       filter->status = 0;
+               } else if (c == 0x4a) {         /* 'J' */
+                       filter->status = 0;
+               } else if (c == 0x49) {         /* 'I' */
+                       filter->status = 0x20;
+               } else {
+                       filter->flag = 1;       /* bad */
+                       filter->status &= ~0xf;
+                       goto retry;
+               }
+               break;
+
+       default:
+               filter->status = 0;
+               break;
+       }
+
+       return c;
+}
diff --git a/ext/mbstring/libmbfl/filters/mbfilter_iso2022_jp_ms.h b/ext/mbstring/libmbfl/filters/mbfilter_iso2022_jp_ms.h
new file mode 100644 (file)
index 0000000..8479a45
--- /dev/null
@@ -0,0 +1,44 @@
+/*
+ * "streamable kanji code filter and converter"
+ * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved.
+ *
+ * LICENSE NOTICES
+ *
+ * This file is part of "streamable kanji code filter and converter",
+ * which is distributed under the terms of GNU Lesser General Public 
+ * License (version 2) as published by the Free Software Foundation.
+ *
+ * This software is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with "streamable kanji code filter and converter";
+ * if not, write to the Free Software Foundation, Inc., 59 Temple Place,
+ * Suite 330, Boston, MA  02111-1307  USA
+ *
+ * The author of this file:
+ *
+ */
+/*
+ * The source code included in this files was separated from mbfilter_ja.c
+ * by moriyoshi koizumi <moriyoshi@php.net> on 4 dec 2002.
+ * 
+ */
+
+#ifndef MBFL_MBFILTER_ISO2022_JP_MS_H
+#define MBFL_MBFILTER_ISO2022_JP_MS_H
+
+#include "mbfilter.h"
+
+extern const mbfl_encoding mbfl_encoding_2022jpms;
+extern const struct mbfl_identify_vtbl vtbl_identify_2022jpms;
+extern const struct mbfl_convert_vtbl vtbl_2022jpms_wchar;
+extern const struct mbfl_convert_vtbl vtbl_wchar_2022jpms;
+int mbfl_filt_conv_2022jpms_wchar(int c, mbfl_convert_filter *filter);
+int mbfl_filt_conv_wchar_2022jpms(int c, mbfl_convert_filter *filter);
+int mbfl_filt_conv_any_2022jpms_flush(mbfl_convert_filter *filter);
+
+#endif /* MBFL_MBFILTER_ISO2022_JP_MS_H */
index 3657658ba859538657bea5e27dc290a352e71ce2..58336d4e284b33c7480deb17701d0d2aa8a6c1fb 100644 (file)
@@ -58,15 +58,6 @@ const mbfl_encoding mbfl_encoding_2022jp = {
        MBFL_ENCTYPE_MBCS | MBFL_ENCTYPE_SHFTCODE
 };
 
-const mbfl_encoding mbfl_encoding_2022jpms = {
-       mbfl_no_encoding_2022jpms,
-       "ISO-2022-JP-MS",
-       "ISO-2022-JP-MS",
-       NULL,
-       NULL,
-       MBFL_ENCTYPE_MBCS | MBFL_ENCTYPE_SHFTCODE
-};
-
 const struct mbfl_identify_vtbl vtbl_identify_jis = {
        mbfl_no_encoding_jis,
        mbfl_filt_ident_common_ctor,
@@ -81,13 +72,6 @@ const struct mbfl_identify_vtbl vtbl_identify_2022jp = {
        mbfl_filt_ident_2022jp
 };
 
-const struct mbfl_identify_vtbl vtbl_identify_2022jpms = {
-       mbfl_no_encoding_2022jpms,
-       mbfl_filt_ident_common_ctor,
-       mbfl_filt_ident_common_dtor,
-       mbfl_filt_ident_2022jp
-};
-
 const struct mbfl_convert_vtbl vtbl_jis_wchar = {
        mbfl_no_encoding_jis,
        mbfl_no_encoding_wchar,
@@ -124,24 +108,6 @@ const struct mbfl_convert_vtbl vtbl_wchar_2022jp = {
        mbfl_filt_conv_any_jis_flush
 };
 
-const struct mbfl_convert_vtbl vtbl_2022jpms_wchar = {
-       mbfl_no_encoding_2022jpms,
-       mbfl_no_encoding_wchar,
-       mbfl_filt_conv_common_ctor,
-       mbfl_filt_conv_common_dtor,
-       mbfl_filt_conv_jis_wchar,
-       mbfl_filt_conv_common_flush
-};
-
-const struct mbfl_convert_vtbl vtbl_wchar_2022jpms = {
-       mbfl_no_encoding_wchar,
-       mbfl_no_encoding_2022jpms,
-       mbfl_filt_conv_common_ctor,
-       mbfl_filt_conv_common_dtor,
-       mbfl_filt_conv_wchar_jis,
-       mbfl_filt_conv_any_jis_flush
-};
-
 #define CK(statement)  do { if ((statement) < 0) return (-1); } while (0)
 
 /*
@@ -164,10 +130,12 @@ retry:
                        filter->status += 2;
                } else if (c == 0x0e) {         /* "kana in" */
                        filter->status = 0x20;
-                       CK((*filter->output_function)(c, filter->data));
                } else if (c == 0x0f) {         /* "kana out" */
                        filter->status = 0;
-                       CK((*filter->output_function)(c, filter->data));
+               } else if (filter->status == 0x10 && c == 0x5c) {       /* YEN SIGN */
+                       CK((*filter->output_function)(0xa5, filter->data));
+               } else if (filter->status == 0x10 && c == 0x7e) {       /* OVER LINE */
+                       CK((*filter->output_function)(0x203e, filter->data));
                } else if (filter->status == 0x20 && c > 0x20 && c < 0x60) {            /* kana */
                        CK((*filter->output_function)(0xff40 + c, filter->data));
                } else if ((filter->status == 0x80 || filter->status == 0x90) && c > 0x20 && c < 0x7f) {                /* kanji first char */
@@ -193,34 +161,9 @@ retry:
                        s = (c1 - 0x21)*94 + c - 0x21;
                        if (filter->status == 0x80) {
                                if (s >= 0 && s < jisx0208_ucs_table_size) {
-                                 if ((filter->from)->no_encoding != 
-                                     mbfl_no_encoding_2022jpms) {
-                                       w = jisx0208_ucs_table[s];
-                                 }
-                                 else {
-                                   if ((c1 - 0x21) == 12) {
-                                     w = cp932ext1_ucs_table[s-12*94];
-                                   }
-                                   else {
-                                     if (c1 >= 0x79 && c1 <= 0x7c) {
-                                       w = cp932ext2_ucs_table[s-(0x79-0x21)*94];
-                                     }
-                                     else {
                                        w = jisx0208_ucs_table[s];
-                                     }
-                                   }
-                                 }
                                } else {
-                                 if ((filter->from)->no_encoding != 
-                                     mbfl_no_encoding_2022jpms) {
-                                       w = 0;
-                                 } else {
-                                     if (c1 >= 0x79 && c1 <= 0x7c) {
-                                       w = cp932ext2_ucs_table[s-(0x79-0x21)*94];
-                                     } else {
                                        w = 0;
-                                     }
-                                 }
                                }
                                if (w <= 0) {
                                        w = (c1 << 8) | c;
@@ -344,7 +287,7 @@ retry:
 int
 mbfl_filt_conv_wchar_jis(int c, mbfl_convert_filter *filter)
 {
-        int c1, c2, s;
+       int c1, s;
 
        s = 0;
        if (c >= ucs_a1_jis_table_min && c < ucs_a1_jis_table_max) {
@@ -356,18 +299,6 @@ mbfl_filt_conv_wchar_jis(int c, mbfl_convert_filter *filter)
        } else if (c >= ucs_r_jis_table_min && c < ucs_r_jis_table_max) {
                s = ucs_r_jis_table[c - ucs_r_jis_table_min];
        }
-       if (s > 0x8080 && s < 0x10000 && 
-           ((filter->to)->no_encoding == mbfl_no_encoding_2022jpms)) {
-         c1 = 0;
-         c2 = cp932ext2_ucs_table_max - cp932ext2_ucs_table_min;
-         while (c1 < c2) {             /* CP932 vendor ext3 (115ku - 119ku) */
-           if (c == cp932ext2_ucs_table[c1]) {
-             s = ((c1/94 + 0x79) << 8) +(c1%94 + 0x21);
-             break;
-           }
-           c1++;
-         }
-       }
        if (s <= 0) {
                c1 = c & ~MBFL_WCSPLANE_MASK;
                if (c1 == MBFL_WCSPLANE_JIS0208) {
@@ -396,30 +327,8 @@ mbfl_filt_conv_wchar_jis(int c, mbfl_convert_filter *filter)
                }
                if (c == 0) {
                        s = 0;
-               } else if (s <= 0 && ((filter->to)->no_encoding ==
-                                     mbfl_no_encoding_2022jpms)) {
+               } else if (s <= 0) {
                        s = -1;
-                       c1 = 0;
-                       c2 = cp932ext1_ucs_table_max - cp932ext1_ucs_table_min;
-                       while (c1 < c2) {               /* CP932 vendor ext1 (13ku) */
-                               if (c == cp932ext1_ucs_table[c1]) {
-                                       s = ((c1/94 + 0x2d) << 8) + (c1%94 + 0x21);
-                                       break;
-                               }
-                               c1++;
-                       }
-                       if (s < 0 && ((filter->to)->no_encoding ==
-                                     mbfl_no_encoding_2022jpms)) {
-                               c1 = 0;
-                               c2 = cp932ext2_ucs_table_max - cp932ext2_ucs_table_min;
-                               while (c1 < c2) {               /* CP932 vendor ext3 (115ku - 119ku) */
-                                       if (c == cp932ext2_ucs_table[c1]) {
-                                         s = ((c1/94 + 0x79) << 8) +(c1%94 + 0x21);
-                                         break;
-                                       }
-                                       c1++;
-                               }
-                       }
                }
        }
        if (s >= 0) {
@@ -483,7 +392,7 @@ mbfl_filt_conv_wchar_jis(int c, mbfl_convert_filter *filter)
 int
 mbfl_filt_conv_wchar_2022jp(int c, mbfl_convert_filter *filter)
 {
-        int c1, c2, s;
+       int s;
 
        s = 0;
        if (c >= ucs_a1_jis_table_min && c < ucs_a1_jis_table_max) {
@@ -519,47 +428,9 @@ mbfl_filt_conv_wchar_2022jp(int c, mbfl_convert_filter *filter)
                        s = 0;
                } else if (s <= 0) {
                        s = -1;
-                       if ( (filter->to)->no_encoding ==
-                            mbfl_no_encoding_2022jpms) {
-                         c1 = 0;
-                         c2 = cp932ext1_ucs_table_max - cp932ext1_ucs_table_min;
-                         while (c1 < c2) {             /* CP932 vendor ext1 (13ku) */
-                           if (c == cp932ext1_ucs_table[c1]) {
-                             s = ((c1/94 + 0x2d) << 8) + (c1%94 + 0x21);
-                             break;
-                           }
-                           c1++;
-                         }
-                         if ((filter->to)->no_encoding ==
-                             mbfl_no_encoding_2022jpms) {
-                           c1 = 0;
-                           c2 = cp932ext2_ucs_table_max - cp932ext2_ucs_table_min;
-                           while (c1 < c2) {           /* CP932 vendor ext3 (115ku - 119ku) */
-                             if (c == cp932ext2_ucs_table[c1]) {
-                               s = ((c1/94 + 0x79) << 8) +(c1%94 + 0x21);
-                               break;
-                             }
-                             c1++;
-                           }
-                         }
-                       }
                }
-       } else if (((s >= 0x80 && s < 0x2121) &&
-                   (filter->to)->no_encoding != mbfl_no_encoding_2022jpms) || 
-                  (s > 0x8080)) {
+       } else if ((s >= 0x80 && s < 0x2121) || (s > 0x8080)) {
                s = -1;
-               if ((filter->to)->no_encoding ==
-                   mbfl_no_encoding_2022jpms) {
-                 c1 = 0;
-                 c2 = cp932ext2_ucs_table_max - cp932ext2_ucs_table_min;
-                 while (c1 < c2) { /* CP932 vendor ext3 (115ku - 119ku) */
-                   if (c == cp932ext2_ucs_table[c1]) {
-                     s = ((c1/94 + 0x79) << 8) +(c1%94 + 0x21);
-                     break;
-                   }
-                   c1++;
-                 }
-               }
        }
        if (s >= 0) {
                if (s < 0x80) { /* ASCII */
@@ -570,15 +441,6 @@ mbfl_filt_conv_wchar_2022jp(int c, mbfl_convert_filter *filter)
                        }
                        filter->status = 0;
                        CK((*filter->output_function)(s, filter->data));
-               } else if (s < 0x100 && ((filter->to)->no_encoding ==
-                                     mbfl_no_encoding_2022jpms)) { /* kana */
-                       if ((filter->status & 0xff00) != 0x100) {
-                               CK((*filter->output_function)(0x1b, filter->data));             /* ESC */
-                               CK((*filter->output_function)(0x28, filter->data));             /* '(' */
-                               CK((*filter->output_function)(0x49, filter->data));             /* 'I' */
-                       }
-                       filter->status = 0x100;
-                       CK((*filter->output_function)(s & 0x7f, filter->data));
                } else if (s < 0x10000) { /* X 0208 */
                        if ((filter->status & 0xff00) != 0x200) {
                                CK((*filter->output_function)(0x1b, filter->data));             /* ESC */
index 1ba244a282ffc2714b0dd4438be083212aacdf6c..037530726375bdfef6bef0ec3a93dafc1a340b75 100644 (file)
 
 extern const mbfl_encoding mbfl_encoding_jis;
 extern const mbfl_encoding mbfl_encoding_2022jp;
-extern const mbfl_encoding mbfl_encoding_2022jpms;
 extern const struct mbfl_identify_vtbl vtbl_identify_2022jp;
-extern const struct mbfl_identify_vtbl vtbl_identify_2022jpms;
 extern const struct mbfl_identify_vtbl vtbl_identify_jis;
 extern const struct mbfl_convert_vtbl vtbl_jis_wchar;
 extern const struct mbfl_convert_vtbl vtbl_wchar_jis;
 extern const struct mbfl_convert_vtbl vtbl_2022jp_wchar;
 extern const struct mbfl_convert_vtbl vtbl_wchar_2022jp;
-extern const struct mbfl_convert_vtbl vtbl_2022jpms_wchar;
-extern const struct mbfl_convert_vtbl vtbl_wchar_2022jpms;
  
 int mbfl_filt_conv_jis_wchar(int c, mbfl_convert_filter *filter);
 int mbfl_filt_conv_wchar_jis(int c, mbfl_convert_filter *filter);
diff --git a/ext/mbstring/libmbfl/filters/mbfilter_koi8u.c b/ext/mbstring/libmbfl/filters/mbfilter_koi8u.c
new file mode 100644 (file)
index 0000000..9b8f450
--- /dev/null
@@ -0,0 +1,146 @@
+/*
+ * "streamable kanji code filter and converter"
+ * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved.
+ *
+ * LICENSE NOTICES
+ *
+ * This file is part of "streamable kanji code filter and converter",
+ * which is distributed under the terms of GNU Lesser General Public 
+ * License (version 2) as published by the Free Software Foundation.
+ *
+ * This software is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with "streamable kanji code filter and converter";
+ * if not, write to the Free Software Foundation, Inc., 59 Temple Place,
+ * Suite 330, Boston, MA  02111-1307  USA
+ *
+ * The author of this part: Maksym Veremeyenko <verem@m1.tv>
+ *
+ * Based on mbfilter_koi8r.c code
+ *
+ */
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "mbfilter.h"
+#include "mbfilter_koi8u.h"
+#include "unicode_table_koi8u.h"
+
+static int mbfl_filt_ident_koi8u(int c, mbfl_identify_filter *filter);
+
+static const char *mbfl_encoding_koi8u_aliases[] = {"KOI8-U", "KOI8U", NULL};
+
+const mbfl_encoding mbfl_encoding_koi8u = {
+       mbfl_no_encoding_koi8u,
+       "KOI8-U",
+       "KOI8-U",
+       (const char *(*)[])&mbfl_encoding_koi8u_aliases,
+       NULL,
+       MBFL_ENCTYPE_SBCS
+};
+
+const struct mbfl_identify_vtbl vtbl_identify_koi8u = {
+       mbfl_no_encoding_koi8u,
+       mbfl_filt_ident_common_ctor,
+       mbfl_filt_ident_common_dtor,
+       mbfl_filt_ident_koi8u
+};
+
+const struct mbfl_convert_vtbl vtbl_wchar_koi8u = {
+       mbfl_no_encoding_wchar,
+       mbfl_no_encoding_koi8u,
+       mbfl_filt_conv_common_ctor,
+       mbfl_filt_conv_common_dtor,
+       mbfl_filt_conv_wchar_koi8u,
+       mbfl_filt_conv_common_flush
+};
+
+const struct mbfl_convert_vtbl vtbl_koi8u_wchar = {
+       mbfl_no_encoding_koi8u,
+       mbfl_no_encoding_wchar,
+       mbfl_filt_conv_common_ctor,
+       mbfl_filt_conv_common_dtor,
+       mbfl_filt_conv_koi8u_wchar,
+       mbfl_filt_conv_common_flush
+};
+
+#define CK(statement)  do { if ((statement) < 0) return (-1); } while (0)
+
+/*
+ * koi8u => wchar
+ */
+int
+mbfl_filt_conv_koi8u_wchar(int c, mbfl_convert_filter *filter)
+{
+       int s;
+
+       if (c >= 0 && c < koi8u_ucs_table_min) {
+               s = c;
+       } else if (c >= koi8u_ucs_table_min && c < 0x100) {
+               s = koi8u_ucs_table[c - koi8u_ucs_table_min];
+               if (s <= 0) {
+                       s = c;
+                       s &= MBFL_WCSPLANE_MASK;
+                       s |= MBFL_WCSPLANE_KOI8U;
+               }
+       } else {
+               s = c;
+               s &= MBFL_WCSGROUP_MASK;
+               s |= MBFL_WCSGROUP_THROUGH;
+       }
+
+       CK((*filter->output_function)(s, filter->data));
+
+       return c;
+}
+
+/*
+ * wchar => koi8u
+ */
+int
+mbfl_filt_conv_wchar_koi8u(int c, mbfl_convert_filter *filter)
+{
+       int s, n;
+
+       if (c < 0x80) {
+               s = c;
+       } else {
+               s = -1;
+               n = koi8u_ucs_table_len-1;
+               while (n >= 0) {
+                       if (c == koi8u_ucs_table[n]) {
+                               s = koi8u_ucs_table_min + n;
+                               break;
+                       }
+                       n--;
+               }
+               if (s <= 0 && (c & ~MBFL_WCSPLANE_MASK) == MBFL_WCSPLANE_KOI8U) {
+                       s = c & MBFL_WCSPLANE_MASK;
+               }
+       }
+
+       if (s >= 0) {
+               CK((*filter->output_function)(s, filter->data));
+       } else {
+               if (filter->illegal_mode != MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE) {
+                       CK(mbfl_filt_conv_illegal_output(c, filter));
+               }
+       }
+
+       return c;
+}
+
+static int mbfl_filt_ident_koi8u(int c, mbfl_identify_filter *filter)
+{
+       if (c >= 0x80 && c < 0xff)
+               filter->flag = 0;
+       else
+               filter->flag = 1; /* not it */
+       return c;       
+}
diff --git a/ext/mbstring/libmbfl/filters/mbfilter_koi8u.h b/ext/mbstring/libmbfl/filters/mbfilter_koi8u.h
new file mode 100644 (file)
index 0000000..693ade3
--- /dev/null
@@ -0,0 +1,47 @@
+/*
+ * "streamable kanji code filter and converter"
+ * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved.
+ *
+ * LICENSE NOTICES
+ *
+ * This file is part of "streamable kanji code filter and converter",
+ * which is distributed under the terms of GNU Lesser General Public 
+ * License (version 2) as published by the Free Software Foundation.
+ *
+ * This software is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with "streamable kanji code filter and converter";
+ * if not, write to the Free Software Foundation, Inc., 59 Temple Place,
+ * Suite 330, Boston, MA  02111-1307  USA
+ *
+ * The author of this part: Maksym Veremeyenko <verem@m1.tv>
+ *
+ * Based on mbfilter_koi8r.h code
+ *
+ */
+
+#ifndef MBFL_MBFILTER_KOI8U_H
+#define MBFL_MBFILTER_KOI8U_H
+
+#include "mbfilter.h"
+
+extern const mbfl_encoding mbfl_encoding_koi8u;
+extern const struct mbfl_identify_vtbl vtbl_identify_koi8u;
+extern const struct mbfl_convert_vtbl vtbl_wchar_koi8u;
+extern const struct mbfl_convert_vtbl vtbl_koi8u_wchar;
+
+int mbfl_filt_conv_koi8u_wchar(int c, mbfl_convert_filter *filter);
+int mbfl_filt_conv_wchar_koi8u(int c, mbfl_convert_filter *filter);
+
+#endif /* MBFL_MBFILTER_KOI8U_H */
+
+/*
+ * Local variables:
+ * tab-width: 4
+ * c-basic-offset: 4
+ * End:
+ */
diff --git a/ext/mbstring/libmbfl/filters/unicode_table_cp1254.h b/ext/mbstring/libmbfl/filters/unicode_table_cp1254.h
new file mode 100644 (file)
index 0000000..644053c
--- /dev/null
@@ -0,0 +1,51 @@
+/*
+ * "streamable kanji code filter and converter"
+ * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved.
+ *
+ * LICENSE NOTICES
+ *
+ * This file is part of "streamable kanji code filter and converter",
+ * which is distributed under the terms of GNU Lesser General Public 
+ * License (version 2) as published by the Free Software Foundation.
+ *
+ * This software is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with "streamable kanji code filter and converter";
+ * if not, write to the Free Software Foundation, Inc., 59 Temple Place,
+ * Suite 330, Boston, MA  02111-1307  USA
+ *
+ * The authors of this file: PHP3 internationalization team
+ * You can contact the primary author 金本 茂 <sgk@happysize.co.jp>.
+ *
+ */
+
+#ifndef UNICODE_TABLE_CP1254_H
+
+/* cp1254 to Unicode table */
+static const unsigned short cp1254_ucs_table[] = {
+ 0x20ac, 0xfffe, 0x201a, 0x0192, 0x201e, 0x2026, 0x2020, 0x2021, 
+ 0x02c6, 0x2030, 0x0160, 0x2039, 0x0152, 0xfffe, 0xfffe, 0xfffe, 
+ 0xfffe, 0x2018, 0x2019, 0x201c, 0x201d, 0x2022, 0x2013, 0x2014, 
+ 0x02dc, 0x2122, 0x0161, 0x203a, 0x0153, 0xfffe, 0xfffe, 0x0178, 
+ 0x00a0, 0x00a1, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7, 
+ 0x00a8, 0x00a9, 0x00aa, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af, 
+ 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x00b6, 0x00b7, 
+ 0x00b8, 0x00b9, 0x00ba, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00bf, 
+ 0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7, 
+ 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf, 
+ 0x011e, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x00d7, 
+ 0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x0130, 0x015e, 0x00df, 
+ 0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7, 
+ 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef, 
+ 0x011f, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x00f7, 
+ 0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x0131, 0x015f, 0x00ff
+};
+static const int cp1254_ucs_table_min = 0x80;
+static const int cp1254_ucs_table_len = (sizeof (cp1254_ucs_table) / sizeof (unsigned short));
+static const int cp1254_ucs_table_max = 0x80 + (sizeof (cp1254_ucs_table) / sizeof (unsigned short));
+
+#endif /* UNICODE_TABLE_CP1254_H */
diff --git a/ext/mbstring/libmbfl/filters/unicode_table_cp850.h b/ext/mbstring/libmbfl/filters/unicode_table_cp850.h
new file mode 100644 (file)
index 0000000..6c60ae0
--- /dev/null
@@ -0,0 +1,52 @@
+/*
+ * "streamable kanji code filter and converter"
+ * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved.
+ *
+ * LICENSE NOTICES
+ *
+ * This file is part of "streamable kanji code filter and converter",
+ * which is distributed under the terms of GNU Lesser General Public 
+ * License (version 2) as published by the Free Software Foundation.
+ *
+ * This software is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with "streamable kanji code filter and converter";
+ * if not, write to the Free Software Foundation, Inc., 59 Temple Place,
+ * Suite 330, Boston, MA  02111-1307  USA
+ *
+ * The author of this part: Den V. Tsopa <tdv@edisoft.ru>
+ * Adaption for CP850: D. Giffeler <dg@artegic.de>
+ *
+ */
+
+#ifndef UNICODE_TABLE_CP850_H
+#define UNICODE_TABLE_CP850_H
+
+/* cp850_DOS to Unicode table */
+static const unsigned short cp850_ucs_table[] = {
+  0x00c7, 0x00fc, 0x00e9, 0x00e2, 0x00e4, 0x00e0, 0x00e5, 0x00e7
+, 0x00ea, 0x00eb, 0x00e8, 0x00ef, 0x00ee, 0x00ec, 0x00c4, 0x00c5
+, 0x00c9, 0x00e6, 0x00c6, 0x00f4, 0x00f6, 0x00f2, 0x00fb, 0x00f9
+, 0x00ff, 0x00d6, 0x00dc, 0x00f8, 0x00a3, 0x00d8, 0x00d7, 0x0192
+, 0x00e1, 0x00ed, 0x00f3, 0x00fa, 0x00f1, 0x00d1, 0x00aa, 0x00ba
+, 0x00bf, 0x00ae, 0x00ac, 0x00bd, 0x00bc, 0x00a1, 0x00ab, 0x00bb
+, 0x2591, 0x2592, 0x2593, 0x2502, 0x2524, 0x00c1, 0x00c2, 0x00c0
+, 0x00a9, 0x2563, 0x2551, 0x2557, 0x255d, 0x00a2, 0x00a5, 0x2510
+, 0x2514, 0x2534, 0x252c, 0x251c, 0x2500, 0x253c, 0x00e3, 0x00c3
+, 0x255a, 0x2554, 0x2569, 0x2566, 0x2560, 0x2550, 0x256c, 0x00a4
+, 0x00f0, 0x00d0, 0x00ca, 0x00cb, 0x00c8, 0x0131, 0x00cd, 0x00ce
+, 0x00cf, 0x2518, 0x250c, 0x2588, 0x2584, 0x00a6, 0x00cc, 0x2580
+, 0x00d3, 0x00df, 0x00d4, 0x00d2, 0x00f5, 0x00d5, 0x00b5, 0x00fe
+, 0x00de, 0x00da, 0x00db, 0x00d9, 0x00fd, 0x00dd, 0x00af, 0x00b4
+, 0x00ad, 0x00b1, 0x2017, 0x00be, 0x00b6, 0x00a7, 0x00f7, 0x00b8
+, 0x00b0, 0x00a8, 0x00b7, 0x00b9, 0x00b3, 0x00b2, 0x25a0, 0x00a0
+};
+static const int cp850_ucs_table_min = 0x80;
+static const int cp850_ucs_table_len = (sizeof (cp850_ucs_table) / sizeof (unsigned short));
+static const int cp850_ucs_table_max = 0x80 + (sizeof (cp850_ucs_table) / sizeof (unsigned short));
+
+#endif /* UNICODE_TABLE_CP850_H */
index e87dad93c8acedb1d15520537321ff627e9dd160..5671c4e8513a0af702b05a8c2fc19d3343a7c133 100644 (file)
@@ -36,13 +36,13 @@ static const unsigned short jisx0208_ucs_table[] = {
  0xFF1F,0xFF01,0x309B,0x309C,0x00B4,0xFF40,0x00A8,0xFF3E,
  0xFFE3,0xFF3F,0x30FD,0x30FE,0x309D,0x309E,0x3003,0x4EDD,
  0x3005,0x3006,0x3007,0x30FC,0x2015,0x2010,0xFF0F,0xFF3C,
- 0xFF5E,0x2225,0xFF5C,0x2026,0x2025,0x2018,0x2019,0x201C,
+ 0x301C,0x2016,0xFF5C,0x2026,0x2025,0x2018,0x2019,0x201C,
  0x201D,0xFF08,0xFF09,0x3014,0x3015,0xFF3B,0xFF3D,0xFF5B,
  0xFF5D,0x3008,0x3009,0x300A,0x300B,0x300C,0x300D,0x300E,
- 0x300F,0x3010,0x3011,0xFF0B,0xFF0D,0x00B1,0x00D7,0x00F7,
+ 0x300F,0x3010,0x3011,0xFF0B,0x2212,0x00B1,0x00D7,0x00F7,
  0xFF1D,0x2260,0xFF1C,0xFF1E,0x2266,0x2267,0x221E,0x2234,
  0x2642,0x2640,0x00B0,0x2032,0x2033,0x2103,0xFFE5,0xFF04,
- 0xFFE0,0xFFE1,0xFF05,0xFF03,0xFF06,0xFF0A,0xFF20,0x00A7,
+ 0x00A2,0x00A3,0xFF05,0xFF03,0xFF06,0xFF0A,0xFF20,0x00A7,
  0x2606,0x2605,0x25CB,0x25CF,0x25CE,0x25C7,
 
  /* ku 2 */
@@ -51,7 +51,7 @@ static const unsigned short jisx0208_ucs_table[] = {
  0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,
  0x0000,0x2208,0x220B,0x2286,0x2287,0x2282,0x2283,0x222A,
  0x2229,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,
- 0x0000,0x2227,0x2228,0xFFE2,0x21D2,0x21D4,0x2200,0x2203,
+ 0x0000,0x2227,0x2228,0x00AC,0x21D2,0x21D4,0x2200,0x2203,
  0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,
  0x0000,0x0000,0x0000,0x2220,0x22A5,0x2312,0x2202,0x2207,
  0x2261,0x2252,0x226A,0x226B,0x221A,0x223D,0x221D,0x2235,
@@ -1217,9 +1217,9 @@ static const unsigned short jisx0212_ucs_table[] = {
  /* ku 2 */
  0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,
  0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x02D8,0x02C7,
- 0x00B8,0x02D9,0x02DD,0x00AF,0x02DB,0x02DA,0xFF5E,0x0384,
+ 0x00B8,0x02D9,0x02DD,0x00AF,0x02DB,0x02DA,0x007E,0x0384,
  0x0385,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,
- 0x0000,0x00A1,0xFFE4,0x00BF,0x0000,0x0000,0x0000,0x0000,
+ 0x0000,0x00A1,0x00A6,0x00BF,0x0000,0x0000,0x0000,0x0000,
  0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,
  0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,
  0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,
@@ -2471,7 +2471,7 @@ static const unsigned short ucs_a2_jis_table[] = {
  /* 2100h */
  0x0000,0x0000,0x0000,0x216E,0x0000,0x0000,0x0000,0x0000,
  0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,
- 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x2d62,0x0000,
+ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0xA2F1,0x0000,
  0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,
  0x0000,0x0000,0xA2EF,0x0000,0x0000,0x0000,0x0000,0x0000,
  0x0000,0x0000,0x0000,0x2272,0x0000,0x0000,0x0000,0x0000,
diff --git a/ext/mbstring/libmbfl/filters/unicode_table_koi8u.h b/ext/mbstring/libmbfl/filters/unicode_table_koi8u.h
new file mode 100644 (file)
index 0000000..f3a4e9d
--- /dev/null
@@ -0,0 +1,166 @@
+/*
+ * "streamable kanji code filter and converter"
+ * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved.
+ *
+ * LICENSE NOTICES
+ *
+ * This file is part of "streamable kanji code filter and converter",
+ * which is distributed under the terms of GNU Lesser General Public 
+ * License (version 2) as published by the Free Software Foundation.
+ *
+ * This software is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with "streamable kanji code filter and converter";
+ * if not, write to the Free Software Foundation, Inc., 59 Temple Place,
+ * Suite 330, Boston, MA  02111-1307  USA
+ *
+ * The author of this part: Maksym Veremeyenko <verem@m1.tv>
+ *
+ */
+
+#ifndef UNICODE_TABLE_KOI8U_H
+#define UNICODE_TABLE_KOI8U_H
+
+/* KOI8-U (RFC2319) to Unicode */
+static const unsigned short koi8u_ucs_table[] = {
+ 0x2500,    /* BOX DRAWINGS  LIGHT HORIZONTAL  */ 
+ 0x2502,    /* BOX DRAWINGS  LIGHT VERTICAL */ 
+ 0x250C,    /* BOX DRAWINGS  LIGHT DOWN AND RIGHT */ 
+ 0x2510,    /* BOX DRAWINGS  LIGHT DOWN AND LEFT */ 
+ 0x2514,    /* BOX DRAWINGS  LIGHT UP AND RIGHT */ 
+ 0x2518,    /* BOX DRAWINGS  LIGHT UP AND LEFT */ 
+ 0x251C,    /* BOX DRAWINGS  LIGHT VERTICAL AND RIGHT */ 
+ 0x2524,    /* BOX DRAWINGS  LIGHT VERTICAL AND LEFT */ 
+ 0x252C,    /* BOX DRAWINGS  LIGHT DOWN AND HORIZONTAL */ 
+ 0x2534,    /* BOX DRAWINGS  LIGHT UP AND HORIZONTAL */ 
+ 0x253C,    /* BOX DRAWINGS  LIGHT VERTICAL AND HORIZONTAL */ 
+ 0x2580,    /* UPPER HALF BLOCK */ 
+ 0x2584,    /* LOWER HALF BLOCK */ 
+ 0x2588,    /* FULL BLOCK */ 
+ 0x258C,    /* LEFT HALF BLOCK */ 
+ 0x2590,    /* RIGHT HALF BLOCK */ 
+ 0x2591,    /* LIGHT SHADE */ 
+ 0x2592,    /* MEDIUM SHADE */ 
+ 0x2593,    /* DARK SHADE */ 
+ 0x2320,    /* TOP HALF INTEGRAL */ 
+ 0x25A0,    /* BLACK SQUARE */ 
+ 0x2219,    /* BULLET OPERATOR */ 
+ 0x221A,    /* SQUARE ROOT */ 
+ 0x2248,    /* ALMOST EQUAL TO */ 
+ 0x2264,    /* LESS THAN OR EQUAL TO */ 
+ 0x2265,    /* GREATER THAN OR EQUAL TO */ 
+ 0x00A0,    /* NO-BREAK SPACE */ 
+ 0x2321,    /* BOTTOM HALF INTEGRAL */ 
+ 0x00B0,    /* DEGREE SIGN */ 
+ 0x00B2,    /* SUPERSCRIPT TWO */ 
+ 0x00B7,    /* MIDDLE DOT */ 
+ 0x00F7,    /* DIVISION SIGN */ 
+ 0x2550,    /* BOX DRAWINGS  DOUBLE HORIZONTAL */ 
+ 0x2551,    /* BOX DRAWINGS  DOUBLE VERTICAL */ 
+ 0x2552,    /* BOX DRAWINGS  DOWN SINGLE AND RIGHT DOUBLE */ 
+ 0x0451,    /* CYRILLIC SMALL LETTER IO */ 
+ 0x0454,    /* CYRILLIC SMALL LETTER UKRAINIAN IE */ 
+ 0x2554,    /* BOX DRAWINGS  DOUBLE DOWN AND RIGHT */ 
+ 0x0456,    /* CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I */ 
+ 0x0457,    /* CYRILLIC SMALL LETTER YI (UKRAINIAN) */ 
+ 0x2557,    /* BOX DRAWINGS  DOUBLE DOWN AND LEFT */ 
+ 0x2558,    /* BOX DRAWINGS  UP SINGLE AND RIGHT DOUBLE */ 
+ 0x2559,    /* BOX DRAWINGS  UP DOUBLE AND RIGHT SINGLE */ 
+ 0x255A,    /* BOX DRAWINGS  DOUBLE UP AND RIGHT */ 
+ 0x255B,    /* BOX DRAWINGS  UP SINGLE AND LEFT DOUBLE */ 
+ 0x0491,    /* CYRILLIC SMALL LETTER GHE WITH UPTURN */ 
+ 0x255D,    /* BOX DRAWINGS  DOUBLE UP AND LEFT */ 
+ 0x255E,    /* BOX DRAWINGS  VERTICAL SINGLE AND RIGHT DOUBLE */ 
+ 0x255F,    /* BOX DRAWINGS  VERTICAL DOUBLE AND RIGHT SINGLE */ 
+ 0x2560,    /* BOX DRAWINGS  DOUBLE VERTICAL AND RIGHT */ 
+ 0x2561,    /* BOX DRAWINGS  VERTICAL SINGLE AND LEFT DOUBLE */ 
+ 0x0401,    /* CYRILLIC CAPITAL LETTER IO */ 
+ 0x0404,    /* CYRILLIC CAPITAL LETTER UKRAINIAN IE */ 
+ 0x2563,    /* BOX DRAWINGS DOUBLE VERTICAL AND LEFT */ 
+ 0x0406,    /* CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I */ 
+ 0x0407,    /* CYRILLIC CAPITAL LETTER YI (UKRAINIAN) */ 
+ 0x2566,    /* BOX DRAWINGS  DOUBLE DOWN AND HORIZONTAL */ 
+ 0x2567,    /* BOX DRAWINGS  UP SINGLE AND HORIZONTAL DOUBLE */ 
+ 0x2568,    /* BOX DRAWINGS  UP DOUBLE AND HORIZONTAL SINGLE */ 
+ 0x2569,    /* BOX DRAWINGS  DOUBLE UP AND HORIZONTAL */ 
+ 0x256A,    /* BOX DRAWINGS  VERTICAL SINGLE AND HORIZONTAL DOUBLE */ 
+ 0x0490,    /* CYRILLIC CAPITAL LETTER GHE WITH UPTURN */ 
+ 0x256C,    /* BOX DRAWINGS  DOUBLE VERTICAL AND HORIZONTAL */ 
+ 0x00A9,    /* COPYRIGHT SIGN */ 
+ 0x044E,    /* CYRILLIC SMALL LETTER YU */ 
+ 0x0430,    /* CYRILLIC SMALL LETTER A */ 
+ 0x0431,    /* CYRILLIC SMALL LETTER BE */ 
+ 0x0446,    /* CYRILLIC SMALL LETTER TSE */ 
+ 0x0434,    /* CYRILLIC SMALL LETTER DE */ 
+ 0x0435,    /* CYRILLIC SMALL LETTER IE */ 
+ 0x0444,    /* CYRILLIC SMALL LETTER EF */ 
+ 0x0433,    /* CYRILLIC SMALL LETTER GHE */ 
+ 0x0445,    /* CYRILLIC SMALL LETTER KHA */ 
+ 0x0438,    /* CYRILLIC SMALL LETTER I */ 
+ 0x0439,    /* CYRILLIC SMALL LETTER SHORT I */ 
+ 0x043A,    /* CYRILLIC SMALL LETTER KA */ 
+ 0x043B,    /* CYRILLIC SMALL LETTER EL */ 
+ 0x043C,    /* CYRILLIC SMALL LETTER EM */ 
+ 0x043D,    /* CYRILLIC SMALL LETTER EN */ 
+ 0x043E,    /* CYRILLIC SMALL LETTER O */ 
+ 0x043F,    /* CYRILLIC SMALL LETTER PE */ 
+ 0x044F,    /* CYRILLIC SMALL LETTER YA */ 
+ 0x0440,    /* CYRILLIC SMALL LETTER ER */ 
+ 0x0441,    /* CYRILLIC SMALL LETTER ES */ 
+ 0x0442,    /* CYRILLIC SMALL LETTER TE */ 
+ 0x0443,    /* CYRILLIC SMALL LETTER U */ 
+ 0x0436,    /* CYRILLIC SMALL LETTER ZHE */ 
+ 0x0432,    /* CYRILLIC SMALL LETTER VE */ 
+ 0x044C,    /* CYRILLIC SMALL LETTER SOFT SIGN */ 
+ 0x044B,    /* CYRILLIC SMALL LETTER YERU */ 
+ 0x0437,    /* CYRILLIC SMALL LETTER ZE */ 
+ 0x0448,    /* CYRILLIC SMALL LETTER SHA */ 
+ 0x044D,    /* CYRILLIC SMALL LETTER E */ 
+ 0x0449,    /* CYRILLIC SMALL LETTER SHCHA */ 
+ 0x0447,    /* CYRILLIC SMALL LETTER CHE */ 
+ 0x044A,    /* CYRILLIC SMALL LETTER HARD SIGN */ 
+ 0x042E,    /* CYRILLIC CAPITAL LETTER YU */ 
+ 0x0410,    /* CYRILLIC CAPITAL LETTER A */ 
+ 0x0411,    /* CYRILLIC CAPITAL LETTER BE */ 
+ 0x0426,    /* CYRILLIC CAPITAL LETTER TSE */ 
+ 0x0414,    /* CYRILLIC CAPITAL LETTER DE */ 
+ 0x0415,    /* CYRILLIC CAPITAL LETTER IE */ 
+ 0x0424,    /* CYRILLIC CAPITAL LETTER EF */ 
+ 0x0413,    /* CYRILLIC CAPITAL LETTER GHE */ 
+ 0x0425,    /* CYRILLIC CAPITAL LETTER KHA */ 
+ 0x0418,    /* CYRILLIC CAPITAL LETTER I */ 
+ 0x0419,    /* CYRILLIC CAPITAL LETTER SHORT I */ 
+ 0x041A,    /* CYRILLIC CAPITAL LETTER KA */ 
+ 0x041B,    /* CYRILLIC CAPITAL LETTER EL */ 
+ 0x041C,    /* CYRILLIC CAPITAL LETTER EM */ 
+ 0x041D,    /* CYRILLIC CAPITAL LETTER EN */ 
+ 0x041E,    /* CYRILLIC CAPITAL LETTER O */ 
+ 0x041F,    /* CYRILLIC CAPITAL LETTER PE */ 
+ 0x042F,    /* CYRILLIC CAPITAL LETTER YA */ 
+ 0x0420,    /* CYRILLIC CAPITAL LETTER ER */ 
+ 0x0421,    /* CYRILLIC CAPITAL LETTER ES */ 
+ 0x0422,    /* CYRILLIC CAPITAL LETTER TE */ 
+ 0x0423,    /* CYRILLIC CAPITAL LETTER U */ 
+ 0x0416,    /* CYRILLIC CAPITAL LETTER ZHE */ 
+ 0x0412,    /* CYRILLIC CAPITAL LETTER VE */ 
+ 0x042C,    /* CYRILLIC CAPITAL LETTER SOFT SIGN */ 
+ 0x042B,    /* CYRILLIC CAPITAL LETTER YERU */ 
+ 0x0417,    /* CYRILLIC CAPITAL LETTER ZE */ 
+ 0x0428,    /* CYRILLIC CAPITAL LETTER SHA */ 
+ 0x042D,    /* CYRILLIC CAPITAL LETTER E */ 
+ 0x0429,    /* CYRILLIC CAPITAL LETTER SHCHA */ 
+ 0x0427,    /* CYRILLIC CAPITAL LETTER CHE */ 
+ 0x042A     /* CYRILLIC CAPITAL LETTER HARD SIGN */
+};
+static const int koi8u_ucs_table_min = 0x80;
+static const int koi8u_ucs_table_len = (sizeof (koi8u_ucs_table) / sizeof (unsigned short));
+static const int koi8u_ucs_table_max = 0x80 + (sizeof (koi8u_ucs_table) / sizeof (unsigned short));
+
+
+
+#endif /* UNNICODE_TABLE_KOI8U_H */
+
index ddb4f9003221890db10b125061be2f1350e67a7f..a6d9cfc7207126ccdb06cee1b6c368802f48e6bb 100644 (file)
@@ -243,6 +243,10 @@ SOURCE=.\filters\mbfilter_koi8r.c
 # End Source File\r
 # Begin Source File\r
 \r
+SOURCE=.\filters\mbfilter_koi8u.c\r
+# End Source File\r
+# Begin Source File\r
+\r
 SOURCE=.\filters\mbfilter_armscii8.c\r
 # End Source File\r
 # Begin Source File\r
@@ -556,6 +560,10 @@ SOURCE=.\filters\mbfilter_koi8r.h
 # End Source File\r
 # Begin Source File\r
 \r
+SOURCE=.\filters\mbfilter_koi8u.h\r
+# End Source File\r
+# Begin Source File\r
+\r
 SOURCE=.\filters\mbfilter_armscii8.h\r
 # End Source File\r
 # Begin Source File\r
@@ -776,6 +784,10 @@ SOURCE=.\filters\unicode_table_koi8r.h
 # End Source File\r
 # Begin Source File\r
 \r
+SOURCE=.\filters\unicode_table_koi8u.h\r
+# End Source File\r
+# Begin Source File\r
+\r
 SOURCE=.\filters\unicode_table_armscii8.h\r
 # End Source File\r
 # Begin Source File\r
index f49f0c0d868ade7571cf1a65ecec8ab9f867890e..becef513a55c0f86c1b4025a3a42791fbc390211 100755 (executable)
@@ -1,21 +1,19 @@
-Microsoft Visual Studio Solution File, Format Version 7.00
+Microsoft Visual Studio Solution File, Format Version 10.00
+# Visual C++ Express 2008
 Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "libmbfl", "libmbfl.vcproj", "{B3636594-A785-4270-A765-8EAE922B5207}"
 EndProject
 Global
-       GlobalSection(SolutionConfiguration) = preSolution
-               ConfigName.0 = Debug
-               ConfigName.1 = Release
+       GlobalSection(SolutionConfigurationPlatforms) = preSolution
+               Debug|Win32 = Debug|Win32
+               Release|Win32 = Release|Win32
        EndGlobalSection
-       GlobalSection(ProjectDependencies) = postSolution
+       GlobalSection(ProjectConfigurationPlatforms) = postSolution
+               {B3636594-A785-4270-A765-8EAE922B5207}.Debug|Win32.ActiveCfg = Debug|Win32
+               {B3636594-A785-4270-A765-8EAE922B5207}.Debug|Win32.Build.0 = Debug|Win32
+               {B3636594-A785-4270-A765-8EAE922B5207}.Release|Win32.ActiveCfg = Release|Win32
+               {B3636594-A785-4270-A765-8EAE922B5207}.Release|Win32.Build.0 = Release|Win32
        EndGlobalSection
-       GlobalSection(ProjectConfiguration) = postSolution
-               {B3636594-A785-4270-A765-8EAE922B5207}.Debug.ActiveCfg = Debug|Win32
-               {B3636594-A785-4270-A765-8EAE922B5207}.Debug.Build.0 = Debug|Win32
-               {B3636594-A785-4270-A765-8EAE922B5207}.Release.ActiveCfg = Release|Win32
-               {B3636594-A785-4270-A765-8EAE922B5207}.Release.Build.0 = Release|Win32
-       EndGlobalSection
-       GlobalSection(ExtensibilityGlobals) = postSolution
-       EndGlobalSection
-       GlobalSection(ExtensibilityAddIns) = postSolution
+       GlobalSection(SolutionProperties) = preSolution
+               HideSolutionNode = FALSE
        EndGlobalSection
 EndGlobal
index 29e0af0a270625913b6b888bc346c59191a9b0c6..0111012d6590c9b2f7effe5ce0d4c8e87bdcd9cb 100755 (executable)
@@ -1,24 +1,50 @@
-<?xml version="1.0" encoding = "shift_jis"?>
+<?xml version="1.0" encoding="shift_jis"?>
 <VisualStudioProject
        ProjectType="Visual C++"
-       Version="7.00"
+       Version="9.00"
        Name="libmbfl"
        ProjectGUID="{B3636594-A785-4270-A765-8EAE922B5207}"
-       SccProjectName=""
-       SccLocalPath="">
+       RootNamespace="libmbfl"
+       TargetFrameworkVersion="131072"
+       >
        <Platforms>
                <Platform
-                       Name="Win32"/>
+                       Name="Win32"
+               />
        </Platforms>
+       <ToolFiles>
+       </ToolFiles>
        <Configurations>
                <Configuration
                        Name="Debug|Win32"
                        OutputDirectory=".\Debug"
                        IntermediateDirectory=".\Debug"
                        ConfigurationType="2"
+                       InheritedPropertySheets="$(VCInstallDir)VCProjectDefaults\UpgradeFromVC70.vsprops"
                        UseOfMFC="0"
-                       ATLMinimizesCRunTimeLibraryUsage="FALSE"
-                       CharacterSet="2">
+                       ATLMinimizesCRunTimeLibraryUsage="false"
+                       CharacterSet="2"
+                       >
+                       <Tool
+                               Name="VCPreBuildEventTool"
+                       />
+                       <Tool
+                               Name="VCCustomBuildTool"
+                       />
+                       <Tool
+                               Name="VCXMLDataGeneratorTool"
+                       />
+                       <Tool
+                               Name="VCWebServiceProxyGeneratorTool"
+                       />
+                       <Tool
+                               Name="VCMIDLTool"
+                               PreprocessorDefinitions="_DEBUG"
+                               MkTypLibCompatible="true"
+                               SuppressStartupBanner="true"
+                               TargetEnvironment="1"
+                               TypeLibraryName=".\Debug/mbfl.tlb"
+                       />
                        <Tool
                                Name="VCCLCompilerTool"
                                Optimization="0"
                                PreprocessorDefinitions="WIN32;_DEBUG;_WINDOWS;_USRDLL;LIBMBFL_EXPORTS;MBFL_DLL_EXPORT;HAVE_CONFIG_H=1"
                                BasicRuntimeChecks="3"
                                RuntimeLibrary="1"
-                               UsePrecompiledHeader="2"
+                               UsePrecompiledHeader="0"
                                PrecompiledHeaderFile=".\Debug/mbfl.pch"
                                AssemblerListingLocation=".\Debug/"
                                ObjectFile=".\Debug/"
                                ProgramDataBaseFileName=".\Debug/"
                                WarningLevel="3"
-                               SuppressStartupBanner="TRUE"
+                               SuppressStartupBanner="true"
                                DebugInformationFormat="4"
-                               CompileAs="0"/>
+                               CompileAs="0"
+                       />
                        <Tool
-                               Name="VCCustomBuildTool"/>
+                               Name="VCManagedResourceCompilerTool"
+                       />
+                       <Tool
+                               Name="VCResourceCompilerTool"
+                               PreprocessorDefinitions="_DEBUG"
+                               Culture="1041"
+                       />
+                       <Tool
+                               Name="VCPreLinkEventTool"
+                       />
                        <Tool
                                Name="VCLinkerTool"
                                AdditionalOptions="/MACHINE:I386"
                                AdditionalDependencies="odbc32.lib odbccp32.lib"
                                OutputFile=".\Debug/mbfl.dll"
                                LinkIncremental="2"
-                               SuppressStartupBanner="TRUE"
+                               SuppressStartupBanner="true"
                                ModuleDefinitionFile=""
-                               GenerateDebugInformation="TRUE"
+                               GenerateDebugInformation="true"
                                ProgramDatabaseFile=".\Debug/mbfl.pdb"
-                               ImportLibrary=".\Debug/mbfl.lib"/>
+                               RandomizedBaseAddress="1"
+                               DataExecutionPrevention="0"
+                               ImportLibrary=".\Debug/mbfl.lib"
+                       />
                        <Tool
-                               Name="VCMIDLTool"
-                               PreprocessorDefinitions="_DEBUG"
-                               MkTypLibCompatible="TRUE"
-                               SuppressStartupBanner="TRUE"
-                               TargetEnvironment="1"
-                               TypeLibraryName=".\Debug/mbfl.tlb"/>
+                               Name="VCALinkTool"
+                       />
                        <Tool
-                               Name="VCPostBuildEventTool"/>
+                               Name="VCManifestTool"
+                       />
                        <Tool
-                               Name="VCPreBuildEventTool"/>
+                               Name="VCXDCMakeTool"
+                       />
                        <Tool
-                               Name="VCPreLinkEventTool"/>
+                               Name="VCBscMakeTool"
+                       />
                        <Tool
-                               Name="VCResourceCompilerTool"
-                               PreprocessorDefinitions="_DEBUG"
-                               Culture="1041"/>
+                               Name="VCFxCopTool"
+                       />
                        <Tool
-                               Name="VCWebServiceProxyGeneratorTool"/>
+                               Name="VCAppVerifierTool"
+                       />
                        <Tool
-                               Name="VCWebDeploymentTool"/>
+                               Name="VCPostBuildEventTool"
+                       />
                </Configuration>
                <Configuration
                        Name="Release|Win32"
                        OutputDirectory=".\Release"
                        IntermediateDirectory=".\Release"
                        ConfigurationType="2"
+                       InheritedPropertySheets="$(VCInstallDir)VCProjectDefaults\UpgradeFromVC70.vsprops"
                        UseOfMFC="0"
-                       ATLMinimizesCRunTimeLibraryUsage="FALSE"
-                       CharacterSet="2">
+                       ATLMinimizesCRunTimeLibraryUsage="false"
+                       CharacterSet="2"
+                       >
+                       <Tool
+                               Name="VCPreBuildEventTool"
+                       />
+                       <Tool
+                               Name="VCCustomBuildTool"
+                       />
+                       <Tool
+                               Name="VCXMLDataGeneratorTool"
+                       />
+                       <Tool
+                               Name="VCWebServiceProxyGeneratorTool"
+                       />
+                       <Tool
+                               Name="VCMIDLTool"
+                               PreprocessorDefinitions="NDEBUG"
+                               MkTypLibCompatible="true"
+                               SuppressStartupBanner="true"
+                               TargetEnvironment="1"
+                               TypeLibraryName=".\Release/mbfl.tlb"
+                       />
                        <Tool
                                Name="VCCLCompilerTool"
                                InlineFunctionExpansion="1"
                                AdditionalIncludeDirectories="mbfl,."
                                PreprocessorDefinitions="WIN32;NDEBUG;_WINDOWS;_USRDLL;LIBMBFL_EXPORTS;HAVE_CONFIG_H"
-                               StringPooling="TRUE"
+                               StringPooling="true"
                                RuntimeLibrary="0"
-                               EnableFunctionLevelLinking="TRUE"
-                               UsePrecompiledHeader="2"
+                               EnableFunctionLevelLinking="true"
+                               UsePrecompiledHeader="0"
                                PrecompiledHeaderFile=".\Release/mbfl.pch"
                                AssemblerListingLocation=".\Release/"
                                ObjectFile=".\Release/"
                                ProgramDataBaseFileName=".\Release/"
                                WarningLevel="3"
-                               SuppressStartupBanner="TRUE"
-                               CompileAs="0"/>
+                               SuppressStartupBanner="true"
+                               CompileAs="0"
+                       />
+                       <Tool
+                               Name="VCManagedResourceCompilerTool"
+                       />
                        <Tool
-                               Name="VCCustomBuildTool"/>
+                               Name="VCResourceCompilerTool"
+                               PreprocessorDefinitions="NDEBUG"
+                               Culture="1033"
+                       />
+                       <Tool
+                               Name="VCPreLinkEventTool"
+                       />
                        <Tool
                                Name="VCLinkerTool"
                                AdditionalOptions="/MACHINE:I386"
                                AdditionalDependencies="odbc32.lib odbccp32.lib"
                                OutputFile=".\Release/mbfl.dll"
                                LinkIncremental="1"
-                               SuppressStartupBanner="TRUE"
+                               SuppressStartupBanner="true"
                                ModuleDefinitionFile=""
                                ProgramDatabaseFile=".\Release/mbfl.pdb"
-                               ImportLibrary=".\Release/mbfl.lib"/>
+                               RandomizedBaseAddress="1"
+                               DataExecutionPrevention="0"
+                               ImportLibrary=".\Release/mbfl.lib"
+                       />
                        <Tool
-                               Name="VCMIDLTool"
-                               PreprocessorDefinitions="NDEBUG"
-                               MkTypLibCompatible="TRUE"
-                               SuppressStartupBanner="TRUE"
-                               TargetEnvironment="1"
-                               TypeLibraryName=".\Release/mbfl.tlb"/>
+                               Name="VCALinkTool"
+                       />
                        <Tool
-                               Name="VCPostBuildEventTool"/>
+                               Name="VCManifestTool"
+                       />
                        <Tool
-                               Name="VCPreBuildEventTool"/>
+                               Name="VCXDCMakeTool"
+                       />
                        <Tool
-                               Name="VCPreLinkEventTool"/>
+                               Name="VCBscMakeTool"
+                       />
                        <Tool
-                               Name="VCResourceCompilerTool"
-                               PreprocessorDefinitions="NDEBUG"
-                               Culture="1033"/>
+                               Name="VCFxCopTool"
+                       />
                        <Tool
-                               Name="VCWebServiceProxyGeneratorTool"/>
+                               Name="VCAppVerifierTool"
+                       />
                        <Tool
-                               Name="VCWebDeploymentTool"/>
+                               Name="VCPostBuildEventTool"
+                       />
                </Configuration>
        </Configurations>
+       <References>
+       </References>
        <Files>
                <Filter
                        Name="Source Files"
-                       Filter="vc6">
+                       Filter="vc6"
+                       >
+                       <File
+                               RelativePath=".\filters\html_entities.c"
+                               >
+                       </File>
+                       <File
+                               RelativePath=".\mbfl\mbfilter.c"
+                               >
+                       </File>
                        <File
-                               RelativePath=".\filters\html_entities.c">
+                               RelativePath=".\filters\mbfilter_7bit.c"
+                               >
                        </File>
                        <File
-                               RelativePath=".\mbfl\mbfilter.c">
+                               RelativePath=".\mbfl\mbfilter_8bit.c"
+                               >
                        </File>
                        <File
-                               RelativePath=".\filters\mbfilter_7bit.c">
+                               RelativePath=".\filters\mbfilter_ascii.c"
+                               >
                        </File>
                        <File
-                               RelativePath=".\mbfl\mbfilter_8bit.c">
+                               RelativePath=".\filters\mbfilter_base64.c"
+                               >
                        </File>
                        <File
-                               RelativePath=".\filters\mbfilter_ascii.c">
+                               RelativePath=".\filters\mbfilter_big5.c"
+                               >
                        </File>
                        <File
-                               RelativePath=".\filters\mbfilter_base64.c">
+                               RelativePath=".\filters\mbfilter_byte2.c"
+                               >
                        </File>
                        <File
-                               RelativePath=".\filters\mbfilter_big5.c">
+                               RelativePath=".\filters\mbfilter_byte4.c"
+                               >
                        </File>
                        <File
-                               RelativePath=".\filters\mbfilter_byte2.c">
+                               RelativePath=".\filters\mbfilter_cp1251.c"
+                               >
                        </File>
                        <File
-                               RelativePath=".\filters\mbfilter_byte4.c">
+                               RelativePath=".\filters\mbfilter_cp1252.c"
+                               >
                        </File>
                        <File
-                               RelativePath=".\filters\mbfilter_cp1251.c">
+                               RelativePath=".\filters\mbfilter_cp1254.c"
+                               >
                        </File>
                        <File
-                               RelativePath=".\filters\mbfilter_cp1252.c">
+                               RelativePath=".\filters\mbfilter_cp866.c"
+                               >
                        </File>
                        <File
-                               RelativePath=".\filters\mbfilter_cp866.c">
+                               RelativePath=".\filters\mbfilter_cp932.c"
+                               >
                        </File>
                        <File
-                               RelativePath=".\filters\mbfilter_cp932.c">
+                               RelativePath=".\filters\mbfilter_cp936.c"
+                               >
                        </File>
                        <File
-                               RelativePath=".\filters\mbfilter_cp936.c">
+                               RelativePath=".\filters\mbfilter_euc_cn.c"
+                               >
                        </File>
                        <File
-                               RelativePath=".\filters\mbfilter_euc_cn.c">
+                               RelativePath=".\filters\mbfilter_euc_jp.c"
+                               >
                        </File>
                        <File
-                               RelativePath=".\filters\mbfilter_euc_jp.c">
+                               RelativePath=".\filters\mbfilter_euc_jp_win.c"
+                               >
                        </File>
                        <File
-                               RelativePath=".\filters\mbfilter_euc_jp_win.c">
+                               RelativePath=".\filters\mbfilter_euc_kr.c"
+                               >
                        </File>
                        <File
-                               RelativePath=".\filters\mbfilter_euc_kr.c">
+                               RelativePath=".\filters\mbfilter_euc_tw.c"
+                               >
                        </File>
                        <File
-                               RelativePath=".\filters\mbfilter_euc_tw.c">
+                               RelativePath=".\filters\mbfilter_htmlent.c"
+                               >
                        </File>
                        <File
-                               RelativePath=".\filters\mbfilter_htmlent.c">
+                               RelativePath=".\filters\mbfilter_hz.c"
+                               >
                        </File>
                        <File
-                               RelativePath=".\filters\mbfilter_hz.c">
+                               RelativePath=".\filters\mbfilter_iso2022_kr.c"
+                               >
                        </File>
                        <File
-                               RelativePath=".\filters\mbfilter_iso2022_kr.c">
+                               RelativePath=".\filters\mbfilter_iso8859_1.c"
+                               >
                        </File>
                        <File
-                               RelativePath=".\filters\mbfilter_iso8859_1.c">
+                               RelativePath=".\filters\mbfilter_iso8859_10.c"
+                               >
                        </File>
                        <File
-                               RelativePath=".\filters\mbfilter_iso8859_10.c">
+                               RelativePath=".\filters\mbfilter_iso8859_13.c"
+                               >
                        </File>
                        <File
-                               RelativePath=".\filters\mbfilter_iso8859_13.c">
+                               RelativePath=".\filters\mbfilter_iso8859_14.c"
+                               >
                        </File>
                        <File
-                               RelativePath=".\filters\mbfilter_iso8859_14.c">
+                               RelativePath=".\filters\mbfilter_iso8859_15.c"
+                               >
                        </File>
                        <File
-                               RelativePath=".\filters\mbfilter_iso8859_15.c">
+                               RelativePath=".\filters\mbfilter_iso8859_16.c"
+                               >
                        </File>
                        <File
-                               RelativePath=".\filters\mbfilter_iso8859_16.c">
+                               RelativePath=".\filters\mbfilter_iso8859_2.c"
+                               >
                        </File>
                        <File
-                               RelativePath=".\filters\mbfilter_iso8859_2.c">
+                               RelativePath=".\filters\mbfilter_iso8859_3.c"
+                               >
                        </File>
                        <File
-                               RelativePath=".\filters\mbfilter_iso8859_3.c">
+                               RelativePath=".\filters\mbfilter_iso8859_4.c"
+                               >
                        </File>
                        <File
-                               RelativePath=".\filters\mbfilter_iso8859_4.c">
+                               RelativePath=".\filters\mbfilter_iso8859_5.c"
+                               >
                        </File>
                        <File
-                               RelativePath=".\filters\mbfilter_iso8859_5.c">
+                               RelativePath=".\filters\mbfilter_iso8859_6.c"
+                               >
                        </File>
                        <File
-                               RelativePath=".\filters\mbfilter_iso8859_6.c">
+                               RelativePath=".\filters\mbfilter_iso8859_7.c"
+                               >
                        </File>
                        <File
-                               RelativePath=".\filters\mbfilter_iso8859_7.c">
+                               RelativePath=".\filters\mbfilter_iso8859_8.c"
+                               >
                        </File>
                        <File
-                               RelativePath=".\filters\mbfilter_iso8859_8.c">
+                               RelativePath=".\filters\mbfilter_iso8859_9.c"
+                               >
                        </File>
                        <File
-                               RelativePath=".\filters\mbfilter_iso8859_9.c">
+                               RelativePath=".\filters\mbfilter_jis.c"
+                               >
                        </File>
                        <File
-                               RelativePath=".\filters\mbfilter_jis.c">
+                               RelativePath=".\filters\mbfilter_koi8r.c"
+                               >
                        </File>
                        <File
-                               RelativePath=".\filters\mbfilter_koi8r.c">
+                               RelativePath=".\filters\mbfilter_koi8u.c"
+                               >
                        </File>
                        <File
-                               RelativePath=".\mbfl\mbfilter_pass.c">
+                               RelativePath=".\mbfl\mbfilter_pass.c"
+                               >
                        </File>
                        <File
-                               RelativePath=".\filters\mbfilter_qprint.c">
+                               RelativePath=".\filters\mbfilter_qprint.c"
+                               >
                        </File>
                        <File
-                               RelativePath=".\filters\mbfilter_sjis.c">
+                               RelativePath=".\filters\mbfilter_sjis.c"
+                               >
                        </File>
                        <File
-                               RelativePath=".\filters\mbfilter_ucs2.c">
+                               RelativePath=".\filters\mbfilter_ucs2.c"
+                               >
                        </File>
                        <File
-                               RelativePath=".\filters\mbfilter_ucs4.c">
+                               RelativePath=".\filters\mbfilter_ucs4.c"
+                               >
                        </File>
                        <File
-                               RelativePath=".\filters\mbfilter_uhc.c">
+                               RelativePath=".\filters\mbfilter_uhc.c"
+                               >
                        </File>
                        <File
-                               RelativePath=".\filters\mbfilter_utf16.c">
+                               RelativePath=".\filters\mbfilter_utf16.c"
+                               >
                        </File>
                        <File
-                               RelativePath=".\filters\mbfilter_utf32.c">
+                               RelativePath=".\filters\mbfilter_utf32.c"
+                               >
                        </File>
                        <File
-                               RelativePath=".\filters\mbfilter_utf7.c">
+                               RelativePath=".\filters\mbfilter_utf7.c"
+                               >
                        </File>
                        <File
-                               RelativePath=".\filters\mbfilter_utf7imap.c">
+                               RelativePath=".\filters\mbfilter_utf7imap.c"
+                               >
                        </File>
                        <File
-                               RelativePath=".\filters\mbfilter_utf8.c">
+                               RelativePath=".\filters\mbfilter_utf8.c"
+                               >
                        </File>
                        <File
-                               RelativePath=".\filters\mbfilter_uuencode.c">
+                               RelativePath=".\filters\mbfilter_uuencode.c"
+                               >
                        </File>
                        <File
-                               RelativePath=".\mbfl\mbfilter_wchar.c">
+                               RelativePath=".\mbfl\mbfilter_wchar.c"
+                               >
                        </File>
                        <File
-                               RelativePath=".\mbfl\mbfl_allocators.c">
+                               RelativePath=".\mbfl\mbfl_allocators.c"
+                               >
                        </File>
                        <File
-                               RelativePath=".\mbfl\mbfl_convert.c">
+                               RelativePath=".\mbfl\mbfl_convert.c"
+                               >
                        </File>
                        <File
-                               RelativePath=".\mbfl\mbfl_encoding.c">
+                               RelativePath=".\mbfl\mbfl_encoding.c"
+                               >
                        </File>
                        <File
-                               RelativePath=".\mbfl\mbfl_filter_output.c">
+                               RelativePath=".\mbfl\mbfl_filter_output.c"
+                               >
                        </File>
                        <File
-                               RelativePath=".\mbfl\mbfl_ident.c">
+                               RelativePath=".\mbfl\mbfl_ident.c"
+                               >
                        </File>
                        <File
-                               RelativePath=".\mbfl\mbfl_language.c">
+                               RelativePath=".\mbfl\mbfl_language.c"
+                               >
                        </File>
                        <File
-                               RelativePath=".\mbfl\mbfl_memory_device.c">
+                               RelativePath=".\mbfl\mbfl_memory_device.c"
+                               >
                        </File>
                        <File
-                               RelativePath=".\mbfl\mbfl_string.c">
+                               RelativePath=".\mbfl\mbfl_string.c"
+                               >
                        </File>
                        <File
-                               RelativePath=".\nls\nls_de.c">
+                               RelativePath=".\nls\nls_de.c"
+                               >
                        </File>
                        <File
-                               RelativePath=".\nls\nls_en.c">
+                               RelativePath=".\nls\nls_en.c"
+                               >
                        </File>
                        <File
-                               RelativePath=".\nls\nls_ja.c">
+                               RelativePath=".\nls\nls_ja.c"
+                               >
                        </File>
                        <File
-                               RelativePath=".\nls\nls_kr.c">
+                               RelativePath=".\nls\nls_kr.c"
+                               >
                        </File>
                        <File
-                               RelativePath=".\nls\nls_neutral.c">
+                               RelativePath=".\nls\nls_neutral.c"
+                               >
                        </File>
                        <File
-                               RelativePath=".\nls\nls_ru.c">
+                               RelativePath=".\nls\nls_ru.c"
+                               >
                        </File>
                        <File
-                               RelativePath=".\nls\nls_uni.c">
+                               RelativePath=".\nls\nls_uni.c"
+                               >
                        </File>
                        <File
-                               RelativePath=".\nls\nls_zh.c">
+                               RelativePath=".\nls\nls_zh.c"
+                               >
                        </File>
                </Filter>
                <Filter
                        Name="Header Files"
-                       Filter="h;hpp;hxx;hm;inl">
+                       Filter="h;hpp;hxx;hm;inl"
+                       >
                        <File
-                               RelativePath=".\config.h.vc6">
+                               RelativePath=".\config.h.vc6"
+                               >
                                <FileConfiguration
-                                       Name="Debug|Win32">
+                                       Name="Debug|Win32"
+                                       >
                                        <Tool
                                                Name="VCCustomBuildTool"
-                                               CommandLine="copy $(InputDir)\config.h.vc6 &quot;$(InputDir)\config.h&quot;
-"
-                                               Outputs="$(InputDir)\config.h"/>
+                                               CommandLine="copy $(InputDir)\config.h.vc6 &quot;$(InputDir)\config.h&quot;&#x0D;&#x0A;"
+                                               Outputs="$(InputDir)\config.h"
+                                       />
                                </FileConfiguration>
                                <FileConfiguration
-                                       Name="Release|Win32">
+                                       Name="Release|Win32"
+                                       >
                                        <Tool
                                                Name="VCCustomBuildTool"
-                                               CommandLine="copy $(InputDir)\config.h.vc6 &quot;$(InputDir)\config.h&quot;
-"
-                                               Outputs="$(InputDir)\config.h"/>
+                                               CommandLine="copy $(InputDir)\config.h.vc6 &quot;$(InputDir)\config.h&quot;&#x0D;&#x0A;"
+                                               Outputs="$(InputDir)\config.h"
+                                       />
                                </FileConfiguration>
                        </File>
                        <File
-                               RelativePath=".\filters\cp932_table.h">
+                               RelativePath=".\filters\cp932_table.h"
+                               >
+                       </File>
+                       <File
+                               RelativePath=".\filters\html_entities.h"
+                               >
+                       </File>
+                       <File
+                               RelativePath=".\mbfl\mbfilter.h"
+                               >
+                       </File>
+                       <File
+                               RelativePath=".\filters\mbfilter_7bit.h"
+                               >
+                       </File>
+                       <File
+                               RelativePath=".\mbfl\mbfilter_8bit.h"
+                               >
+                       </File>
+                       <File
+                               RelativePath=".\filters\mbfilter_armscii8.h"
+                               >
+                       </File>
+                       <File
+                               RelativePath=".\filters\mbfilter_ascii.h"
+                               >
+                       </File>
+                       <File
+                               RelativePath=".\filters\mbfilter_base64.h"
+                               >
+                       </File>
+                       <File
+                               RelativePath=".\filters\mbfilter_big5.h"
+                               >
+                       </File>
+                       <File
+                               RelativePath=".\filters\mbfilter_byte2.h"
+                               >
+                       </File>
+                       <File
+                               RelativePath=".\filters\mbfilter_byte4.h"
+                               >
+                       </File>
+                       <File
+                               RelativePath=".\filters\mbfilter_cp1251.h"
+                               >
+                       </File>
+                       <File
+                               RelativePath=".\filters\mbfilter_cp1252.h"
+                               >
                        </File>
                        <File
-                               RelativePath=".\filters\html_entities.h">
+                               RelativePath=".\filters\mbfilter_cp1254.h"
+                               >
                        </File>
                        <File
-                               RelativePath=".\mbfl\mbfilter.h">
+                               RelativePath=".\filters\mbfilter_cp5022x.h"
+                               >
                        </File>
                        <File
-                               RelativePath=".\filters\mbfilter_7bit.h">
+                               RelativePath=".\filters\mbfilter_cp51932.h"
+                               >
                        </File>
                        <File
-                               RelativePath=".\mbfl\mbfilter_8bit.h">
+                               RelativePath=".\filters\mbfilter_cp866.h"
+                               >
                        </File>
                        <File
-                               RelativePath=".\filters\mbfilter_ascii.h">
+                               RelativePath=".\filters\mbfilter_cp932.h"
+                               >
                        </File>
                        <File
-                               RelativePath=".\filters\mbfilter_base64.h">
+                               RelativePath=".\filters\mbfilter_cp936.h"
+                               >
                        </File>
                        <File
-                               RelativePath=".\filters\mbfilter_big5.h">
+                               RelativePath=".\filters\mbfilter_euc_cn.h"
+                               >
                        </File>
                        <File
-                               RelativePath=".\filters\mbfilter_byte2.h">
+                               RelativePath=".\filters\mbfilter_euc_jp.h"
+                               >
                        </File>
                        <File
-                               RelativePath=".\filters\mbfilter_byte4.h">
+                               RelativePath=".\filters\mbfilter_euc_jp_win.h"
+                               >
                        </File>
                        <File
-                               RelativePath=".\filters\mbfilter_cp1251.h">
+                               RelativePath=".\filters\mbfilter_euc_kr.h"
+                               >
                        </File>
                        <File
-                               RelativePath=".\filters\mbfilter_cp1252.h">
+                               RelativePath=".\filters\mbfilter_euc_tw.h"
+                               >
                        </File>
                        <File
-                               RelativePath=".\filters\mbfilter_cp866.h">
+                               RelativePath=".\filters\mbfilter_htmlent.h"
+                               >
                        </File>
                        <File
-                               RelativePath=".\filters\mbfilter_cp932.h">
+                               RelativePath=".\filters\mbfilter_hz.h"
+                               >
                        </File>
                        <File
-                               RelativePath=".\filters\mbfilter_cp936.h">
+                               RelativePath=".\filters\mbfilter_iso2022_kr.h"
+                               >
                        </File>
                        <File
-                               RelativePath=".\filters\mbfilter_euc_cn.h">
+                               RelativePath=".\filters\mbfilter_iso8859_1.h"
+                               >
                        </File>
                        <File
-                               RelativePath=".\filters\mbfilter_euc_jp.h">
+                               RelativePath=".\filters\mbfilter_iso8859_10.h"
+                               >
                        </File>
                        <File
-                               RelativePath=".\filters\mbfilter_euc_jp_win.h">
+                               RelativePath=".\filters\mbfilter_iso8859_13.h"
+                               >
                        </File>
                        <File
-                               RelativePath=".\filters\mbfilter_euc_kr.h">
+                               RelativePath=".\filters\mbfilter_iso8859_14.h"
+                               >
                        </File>
                        <File
-                               RelativePath=".\filters\mbfilter_euc_tw.h">
+                               RelativePath=".\filters\mbfilter_iso8859_15.h"
+                               >
                        </File>
                        <File
-                               RelativePath=".\filters\mbfilter_htmlent.h">
+                               RelativePath=".\filters\mbfilter_iso8859_16.h"
+                               >
                        </File>
                        <File
-                               RelativePath=".\filters\mbfilter_hz.h">
+                               RelativePath=".\filters\mbfilter_iso8859_2.h"
+                               >
                        </File>
                        <File
-                               RelativePath=".\filters\mbfilter_iso2022_kr.h">
+                               RelativePath=".\filters\mbfilter_iso8859_3.h"
+                               >
                        </File>
                        <File
-                               RelativePath=".\filters\mbfilter_iso8859_1.h">
+                               RelativePath=".\filters\mbfilter_iso8859_4.h"
+                               >
                        </File>
                        <File
-                               RelativePath=".\filters\mbfilter_iso8859_10.h">
+                               RelativePath=".\filters\mbfilter_iso8859_5.h"
+                               >
                        </File>
                        <File
-                               RelativePath=".\filters\mbfilter_iso8859_13.h">
+                               RelativePath=".\filters\mbfilter_iso8859_6.h"
+                               >
                        </File>
                        <File
-                               RelativePath=".\filters\mbfilter_iso8859_14.h">
+                               RelativePath=".\filters\mbfilter_iso8859_7.h"
+                               >
                        </File>
                        <File
-                               RelativePath=".\filters\mbfilter_iso8859_15.h">
+                               RelativePath=".\filters\mbfilter_iso8859_8.h"
+                               >
                        </File>
                        <File
-                               RelativePath=".\filters\mbfilter_iso8859_16.h">
+                               RelativePath=".\filters\mbfilter_iso8859_9.h"
+                               >
                        </File>
                        <File
-                               RelativePath=".\filters\mbfilter_iso8859_2.h">
+                               RelativePath=".\filters\mbfilter_jis.h"
+                               >
                        </File>
                        <File
-                               RelativePath=".\filters\mbfilter_iso8859_3.h">
+                               RelativePath=".\filters\mbfilter_koi8r.h"
+                               >
                        </File>
                        <File
-                               RelativePath=".\filters\mbfilter_iso8859_4.h">
+                               RelativePath=".\filters\mbfilter_koi8u.h"
+                               >
                        </File>
                        <File
-                               RelativePath=".\filters\mbfilter_iso8859_5.h">
+                               RelativePath=".\mbfl\mbfilter_pass.h"
+                               >
                        </File>
                        <File
-                               RelativePath=".\filters\mbfilter_iso8859_6.h">
+                               RelativePath=".\filters\mbfilter_qprint.h"
+                               >
                        </File>
                        <File
-                               RelativePath=".\filters\mbfilter_iso8859_7.h">
+                               RelativePath=".\filters\mbfilter_sjis.h"
+                               >
                        </File>
                        <File
-                               RelativePath=".\filters\mbfilter_iso8859_8.h">
+                               RelativePath=".\filters\mbfilter_tl_jisx0201_jisx0208.h"
+                               >
                        </File>
                        <File
-                               RelativePath=".\filters\mbfilter_iso8859_9.h">
+                               RelativePath=".\filters\mbfilter_ucs2.h"
+                               >
                        </File>
                        <File
-                               RelativePath=".\filters\mbfilter_jis.h">
+                               RelativePath=".\filters\mbfilter_ucs4.h"
+                               >
                        </File>
                        <File
-                               RelativePath=".\filters\mbfilter_koi8r.h">
+                               RelativePath=".\filters\mbfilter_uhc.h"
+                               >
                        </File>
                        <File
-                               RelativePath=".\mbfl\mbfilter_pass.h">
+                               RelativePath=".\filters\mbfilter_utf16.h"
+                               >
                        </File>
                        <File
-                               RelativePath=".\filters\mbfilter_qprint.h">
+                               RelativePath=".\filters\mbfilter_utf32.h"
+                               >
                        </File>
                        <File
-                               RelativePath=".\filters\mbfilter_sjis.h">
+                               RelativePath=".\filters\mbfilter_utf7.h"
+                               >
                        </File>
                        <File
-                               RelativePath=".\filters\mbfilter_ucs2.h">
+                               RelativePath=".\filters\mbfilter_utf7imap.h"
+                               >
                        </File>
                        <File
-                               RelativePath=".\filters\mbfilter_ucs4.h">
+                               RelativePath=".\filters\mbfilter_utf8.h"
+                               >
                        </File>
                        <File
-                               RelativePath=".\filters\mbfilter_uhc.h">
+                               RelativePath=".\filters\mbfilter_uuencode.h"
+                               >
                        </File>
                        <File
-                               RelativePath=".\filters\mbfilter_utf16.h">
+                               RelativePath=".\mbfl\mbfilter_wchar.h"
+                               >
                        </File>
                        <File
-                               RelativePath=".\filters\mbfilter_utf32.h">
+                               RelativePath=".\mbfl\mbfl_allocators.h"
+                               >
                        </File>
                        <File
-                               RelativePath=".\filters\mbfilter_utf7.h">
+                               RelativePath=".\mbfl\mbfl_consts.h"
+                               >
                        </File>
                        <File
-                               RelativePath=".\filters\mbfilter_utf7imap.h">
+                               RelativePath=".\mbfl\mbfl_convert.h"
+                               >
                        </File>
                        <File
-                               RelativePath=".\filters\mbfilter_utf8.h">
+                               RelativePath=".\mbfl\mbfl_encoding.h"
+                               >
                        </File>
                        <File
-                               RelativePath=".\filters\mbfilter_uuencode.h">
+                               RelativePath=".\mbfl\mbfl_filter_output.h"
+                               >
                        </File>
                        <File
-                               RelativePath=".\mbfl\mbfilter_wchar.h">
+                               RelativePath=".\mbfl\mbfl_ident.h"
+                               >
                        </File>
                        <File
-                               RelativePath=".\mbfl\mbfl_allocators.h">
+                               RelativePath=".\mbfl\mbfl_language.h"
+                               >
                        </File>
                        <File
-                               RelativePath=".\mbfl\mbfl_consts.h">
+                               RelativePath=".\mbfl\mbfl_memory_device.h"
+                               >
                        </File>
                        <File
-                               RelativePath=".\mbfl\mbfl_convert.h">
+                               RelativePath=".\mbfl\mbfl_string.h"
+                               >
                        </File>
                        <File
-                               RelativePath=".\mbfl\mbfl_encoding.h">
+                               RelativePath=".\nls\nls_de.h"
+                               >
                        </File>
                        <File
-                               RelativePath=".\mbfl\mbfl_filter_output.h">
+                               RelativePath=".\nls\nls_en.h"
+                               >
                        </File>
                        <File
-                               RelativePath=".\mbfl\mbfl_ident.h">
+                               RelativePath=".\nls\nls_hy.h"
+                               >
                        </File>
                        <File
-                               RelativePath=".\mbfl\mbfl_language.h">
+                               RelativePath=".\nls\nls_ja.h"
+                               >
                        </File>
                        <File
-                               RelativePath=".\mbfl\mbfl_memory_device.h">
+                               RelativePath=".\nls\nls_kr.h"
+                               >
                        </File>
                        <File
-                               RelativePath=".\mbfl\mbfl_string.h">
+                               RelativePath=".\nls\nls_neutral.h"
+                               >
                        </File>
                        <File
-                               RelativePath=".\nls\nls_de.h">
+                               RelativePath=".\nls\nls_ru.h"
+                               >
                        </File>
                        <File
-                               RelativePath=".\nls\nls_en.h">
+                               RelativePath=".\nls\nls_tr.h"
+                               >
                        </File>
                        <File
-                               RelativePath=".\nls\nls_ja.h">
+                               RelativePath=".\nls\nls_ua.h"
+                               >
                        </File>
                        <File
-                               RelativePath=".\nls\nls_kr.h">
+                               RelativePath=".\nls\nls_uni.h"
+                               >
                        </File>
                        <File
-                               RelativePath=".\nls\nls_neutral.h">
+                               RelativePath=".\nls\nls_zh.h"
+                               >
                        </File>
                        <File
-                               RelativePath=".\nls\nls_ru.h">
+                               RelativePath=".\filters\unicode_prop.h"
+                               >
                        </File>
                        <File
-                               RelativePath=".\nls\nls_uni.h">
+                               RelativePath=".\filters\unicode_table_armscii8.h"
+                               >
                        </File>
                        <File
-                               RelativePath=".\nls\nls_zh.h">
+                               RelativePath=".\filters\unicode_table_big5.h"
+                               >
                        </File>
                        <File
-                               RelativePath=".\filters\unicode_prop.h">
+                               RelativePath=".\filters\unicode_table_cns11643.h"
+                               >
                        </File>
                        <File
-                               RelativePath=".\filters\unicode_table_big5.h">
+                               RelativePath=".\filters\unicode_table_cp1251.h"
+                               >
                        </File>
                        <File
-                               RelativePath=".\filters\unicode_table_cns11643.h">
+                               RelativePath=".\filters\unicode_table_cp1252.h"
+                               >
                        </File>
                        <File
-                               RelativePath=".\filters\unicode_table_cp1251.h">
+                               RelativePath=".\filters\unicode_table_cp1254.h"
+                               >
                        </File>
                        <File
-                               RelativePath=".\filters\unicode_table_cp1252.h">
+                               RelativePath=".\filters\unicode_table_cp866.h"
+                               >
                        </File>
                        <File
-                               RelativePath=".\filters\unicode_table_cp866.h">
+                               RelativePath=".\filters\unicode_table_cp932_ext.h"
+                               >
                        </File>
                        <File
-                               RelativePath=".\filters\unicode_table_cp932_ext.h">
+                               RelativePath=".\filters\unicode_table_cp936.h"
+                               >
                        </File>
                        <File
-                               RelativePath=".\filters\unicode_table_cp936.h">
+                               RelativePath=".\filters\unicode_table_iso8859_10.h"
+                               >
                        </File>
                        <File
-                               RelativePath=".\filters\unicode_table_iso8859_10.h">
+                               RelativePath=".\filters\unicode_table_iso8859_13.h"
+                               >
                        </File>
                        <File
-                               RelativePath=".\filters\unicode_table_iso8859_13.h">
+                               RelativePath=".\filters\unicode_table_iso8859_14.h"
+                               >
                        </File>
                        <File
-                               RelativePath=".\filters\unicode_table_iso8859_14.h">
+                               RelativePath=".\filters\unicode_table_iso8859_15.h"
+                               >
                        </File>
                        <File
-                               RelativePath=".\filters\unicode_table_iso8859_15.h">
+                               RelativePath=".\filters\unicode_table_iso8859_16.h"
+                               >
                        </File>
                        <File
-                               RelativePath=".\filters\unicode_table_iso8859_16.h">
+                               RelativePath=".\filters\unicode_table_iso8859_2.h"
+                               >
                        </File>
                        <File
-                               RelativePath=".\filters\unicode_table_iso8859_2.h">
+                               RelativePath=".\filters\unicode_table_iso8859_3.h"
+                               >
                        </File>
                        <File
-                               RelativePath=".\filters\unicode_table_iso8859_3.h">
+                               RelativePath=".\filters\unicode_table_iso8859_4.h"
+                               >
                        </File>
                        <File
-                               RelativePath=".\filters\unicode_table_iso8859_4.h">
+                               RelativePath=".\filters\unicode_table_iso8859_5.h"
+                               >
                        </File>
                        <File
-                               RelativePath=".\filters\unicode_table_iso8859_5.h">
+                               RelativePath=".\filters\unicode_table_iso8859_6.h"
+                               >
                        </File>
                        <File
-                               RelativePath=".\filters\unicode_table_iso8859_6.h">
+                               RelativePath=".\filters\unicode_table_iso8859_7.h"
+                               >
                        </File>
                        <File
-                               RelativePath=".\filters\unicode_table_iso8859_7.h">
+                               RelativePath=".\filters\unicode_table_iso8859_8.h"
+                               >
                        </File>
                        <File
-                               RelativePath=".\filters\unicode_table_iso8859_8.h">
+                               RelativePath=".\filters\unicode_table_iso8859_9.h"
+                               >
                        </File>
                        <File
-                               RelativePath=".\filters\unicode_table_iso8859_9.h">
+                               RelativePath=".\filters\unicode_table_jis.h"
+                               >
                        </File>
                        <File
-                               RelativePath=".\filters\unicode_table_jis.h">
+                               RelativePath=".\filters\unicode_table_koi8r.h"
+                               >
                        </File>
                        <File
-                               RelativePath=".\filters\unicode_table_koi8r.h">
+                               RelativePath=".\filters\unicode_table_koi8u.h"
+                               >
                        </File>
                        <File
-                               RelativePath=".\filters\unicode_table_uhc.h">
+                               RelativePath=".\filters\unicode_table_uhc.h"
+                               >
                        </File>
                </Filter>
                <Filter
                        Name="Resource Files"
-                       Filter="ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe">
+                       Filter="ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe"
+                       >
                        <File
-                               RelativePath=".\mbfl.rc">
+                               RelativePath=".\mbfl.rc"
+                               >
                        </File>
                </Filter>
+               <File
+                       RelativePath=".\filters\mbfilter_armscii8.c"
+                       >
+               </File>
+               <File
+                       RelativePath=".\filters\mbfilter_cp5022x.c"
+                       >
+               </File>
+               <File
+                       RelativePath=".\filters\mbfilter_cp51932.c"
+                       >
+               </File>
+               <File
+                       RelativePath=".\filters\mbfilter_tl_jisx0201_jisx0208.c"
+                       >
+               </File>
+               <File
+                       RelativePath=".\nls\nls_hy.c"
+                       >
+               </File>
+               <File
+                       RelativePath=".\nls\nls_tr.c"
+                       >
+               </File>
+               <File
+                       RelativePath=".\nls\nls_ua.c"
+                       >
+               </File>
        </Files>
        <Globals>
        </Globals>
index 655e61893ab65c34f484968bdc4068dd16388f04..9d6a0c78e51c4fe0fb8c35b17c694af462908dde 100644 (file)
@@ -1,7 +1,7 @@
 /* $Id$ */
 1 VERSIONINFO
-FILEVERSION 1,1,0,0 
-PRODUCTVERSION 1,1,0,0 
+FILEVERSION 1,0,2,0 
+PRODUCTVERSION 1,0,2,0 
 FILEFLAGSMASK 0
 FILEOS 0x40000
 FILETYPE 1
@@ -12,12 +12,12 @@ FILETYPE 1
     {
       VALUE "CompanyName", "-\0"
       VALUE "FileDescription", "streamable kanji code filter\0"
-      VALUE "FileVersion", "1.1.0\0"
+      VALUE "FileVersion", "1.0.2\0"
       VALUE "InternalName", "mbfl\0"
       VALUE "LegalCopyright", "GNU Lesser Public License Version 2.0\0"
       VALUE "OriginalFilename", "mbfl.dll\0"
       VALUE "ProductName", "mbfl\0"
-      VALUE "ProductVersion", "1.1.0\0"
+      VALUE "ProductVersion", "1.0.2\0"
     }
   }
 }
index 25d67345416806c0c6e3b63397db498e738f51f9..6e662d14e50d4055f5db638f1d68d368ed296b30 100644 (file)
@@ -1,12 +1,37 @@
 EXTRA_DIST=Makefile.bcc32 mk_eaw_tbl.awk
 lib_LTLIBRARIES=libmbfl.la
-libmbfl_la_SOURCES=mbfilter.c mbfl_string.c mbfl_language.c mbfl_encoding.c mbfl_convert.c mbfl_ident.c mbfl_memory_device.c mbfl_allocators.c mbfl_filter_output.c mbfilter_pass.c mbfilter_wchar.c mbfilter_8bit.c eaw_table.h
+libmbfl_la_SOURCES=mbfilter.c \
+       mbfl_string.c \
+       mbfl_language.c \
+       mbfl_encoding.c \
+       mbfl_convert.c \
+       mbfl_ident.c \
+       mbfl_memory_device.c \
+       mbfl_allocators.c \
+       mbfl_filter_output.c \
+       mbfilter_pass.c \
+       mbfilter_wchar.c \
+       mbfilter_8bit.c \
+       eaw_table.h
 libmbfl_filters_la=../filters/libmbfl_filters.la
 libmbfl_nls_la=../nls/libmbfl_nls.la
 libmbfl_la_LIBADD=$(libmbfl_filters_la) $(libmbfl_nls_la)
 libmbfl_la_LDFLAGS=-version-info $(SHLIB_VERSION)
 libmbfl_includedir=$(includedir)/mbfl
-libmbfl_include_HEADERS=mbfilter.h mbfl_consts.h mbfl_encoding.h mbfl_language.h mbfl_string.h mbfl_convert.h mbfl_ident.h mbfl_memory_device.h mbfl_allocators.h mbfl_defs.h mbfl_filter_output.h mbfilter_pass.h mbfilter_wchar.h mbfilter_8bit.h 
+libmbfl_include_HEADERS=mbfilter.h \
+       mbfl_consts.h \
+       mbfl_encoding.h \
+       mbfl_language.h \
+       mbfl_string.h \
+       mbfl_convert.h \
+       mbfl_ident.h \
+       mbfl_memory_device.h \
+       mbfl_allocators.h \
+       mbfl_defs.h \
+       mbfl_filter_output.h \
+       mbfilter_pass.h \
+       mbfilter_wchar.h \
+       mbfilter_8bit.h
 
 mbfilter.c: eaw_table.h
 
index 5d43e6a6f46957c97164894620340db403bb4c24..1b43a49efecc670647864f4cb3dfc54be35c1379 100644 (file)
@@ -1,5 +1,16 @@
 !include ..\rules.mak.bcc32
-OBJS=mbfilter.obj mbfilter_8bit.obj mbfilter_pass.obj mbfilter_wchar.obj mbfl_allocators.obj mbfl_convert.obj mbfl_encoding.obj mbfl_filter_output.obj mbfl_ident.obj mbfl_language.obj mbfl_memory_device.obj mbfl_string.obj
+OBJS=mbfilter.obj \
+       mbfilter_8bit.obj \
+       mbfilter_pass.obj \
+       mbfilter_wchar.obj \
+       mbfl_allocators.obj \
+       mbfl_convert.obj \
+       mbfl_encoding.obj \
+       mbfl_filter_output.obj \
+       mbfl_ident.obj \
+       mbfl_language.obj \
+       mbfl_memory_device.obj \
+       mbfl_string.obj
 
 all: $(OBJS)
 
index 73b7229c223ad6b4b66db170cc56b0bb0e83057e..a00c51b5f4b1a170791acf5a89fc79f6782e5098 100644 (file)
 #include "mbfl_convert.h"
 #include "mbfl_ident.h"
 
+/*
+ * version information
+ */
+#define MBFL_VERSION_MAJOR 1
+#define MBFL_VERSION_MINOR 0
+#define MBFL_VERSION_TEENY 2
+
 /*
  * convert filter
  */
index a87c5646162ce97f0f3ac14ca50b34051043e385..4fc89226059e3b8c82f6f9257c57bb8e59fc733e 100644 (file)
@@ -34,6 +34,6 @@
 #include "mbfl_defs.h"
 #include "mbfilter.h"
 
-MBFLAPI extern const mbfl_encoding mbfl_encoding_8bit;
+extern const mbfl_encoding mbfl_encoding_8bit;
 
 #endif /* MBFL_MBFILTER_8BIT_H */
index 49d169c66899d6a86b3b5861b1abfc65ec5e0abf..087aa2c3be5aaab6251db2770e42544197abbb49 100644 (file)
@@ -33,8 +33,8 @@
 #include "mbfl_defs.h"
 #include "mbfilter.h"
 
-MBFLAPI extern const mbfl_encoding mbfl_encoding_pass; 
-MBFLAPI extern const struct mbfl_convert_vtbl vtbl_pass;
+extern const mbfl_encoding mbfl_encoding_pass; 
+extern const struct mbfl_convert_vtbl vtbl_pass;
 
 MBFLAPI extern int mbfl_filt_conv_pass(int c, mbfl_convert_filter *filter);
 
index 9e9396a77f716662586a7097cc0247c1a557538c..24bf7473c134f1f393db30fef84fea0a3266a46f 100644 (file)
@@ -34,6 +34,6 @@
 #include "mbfl_defs.h"
 #include "mbfilter.h"
 
-MBFLAPI extern const mbfl_encoding mbfl_encoding_wchar;
+extern const mbfl_encoding mbfl_encoding_wchar;
 
 #endif /* MBFL_MBFILTER_WCHAR_H */
index 17e00dd5952140364965ab511facc39be8c51a9e..725a674b390ac960cd6af3db88e12d24d3983ae7 100644 (file)
 #include "filters/mbfilter_sjis.h"
 #include "filters/mbfilter_cp51932.h"
 #include "filters/mbfilter_jis.h"
+#include "filters/mbfilter_iso2022_jp_ms.h"
 #include "filters/mbfilter_euc_jp.h"
 #include "filters/mbfilter_euc_jp_win.h"
 #include "filters/mbfilter_ascii.h"
 #include "filters/mbfilter_koi8r.h"
+#include "filters/mbfilter_koi8u.h"
 #include "filters/mbfilter_cp866.h"
 #include "filters/mbfilter_cp932.h"
 #include "filters/mbfilter_cp936.h"
 #include "filters/mbfilter_cp1251.h"
 #include "filters/mbfilter_cp1252.h"
+#include "filters/mbfilter_cp1254.h"
 #include "filters/mbfilter_iso8859_1.h"
 #include "filters/mbfilter_iso8859_2.h"
 #include "filters/mbfilter_iso8859_3.h"
@@ -140,8 +143,12 @@ const struct mbfl_convert_vtbl *mbfl_convert_filter_list[] = {
        &vtbl_wchar_cp866,
        &vtbl_koi8r_wchar,
        &vtbl_wchar_koi8r,
+       &vtbl_koi8u_wchar,
+       &vtbl_wchar_koi8u,
        &vtbl_cp1252_wchar,
        &vtbl_wchar_cp1252,
+       &vtbl_cp1254_wchar,
+       &vtbl_wchar_cp1254,
        &vtbl_ascii_wchar,
        &vtbl_wchar_ascii,
        &vtbl_8859_1_wchar,
index 2e5b4abaf80711c9fd2cd11cd709c5d2d652d8e9..76956f05305270a03b33c5836f97a7661a4bd336 100644 (file)
 #include "filters/mbfilter_sjis.h"
 #include "filters/mbfilter_cp51932.h"
 #include "filters/mbfilter_jis.h"
+#include "filters/mbfilter_iso2022_jp_ms.h"
 #include "filters/mbfilter_euc_jp.h"
 #include "filters/mbfilter_euc_jp_win.h"
 #include "filters/mbfilter_ascii.h"
 #include "filters/mbfilter_koi8r.h"
+#include "filters/mbfilter_koi8u.h"
 #include "filters/mbfilter_cp866.h"
 #include "filters/mbfilter_cp932.h"
 #include "filters/mbfilter_cp936.h"
 #include "filters/mbfilter_cp1251.h"
 #include "filters/mbfilter_cp1252.h"
+#include "filters/mbfilter_cp1254.h"
 #include "filters/mbfilter_iso8859_1.h"
 #include "filters/mbfilter_iso8859_2.h"
 #include "filters/mbfilter_iso8859_3.h"
@@ -156,6 +159,7 @@ static const mbfl_encoding *mbfl_encoding_ptr_list[] = {
        &mbfl_encoding_2022jp,
        &mbfl_encoding_2022jpms,
        &mbfl_encoding_cp1252,
+       &mbfl_encoding_cp1254,
        &mbfl_encoding_8859_1,
        &mbfl_encoding_8859_2,
        &mbfl_encoding_8859_3,
@@ -181,6 +185,7 @@ static const mbfl_encoding *mbfl_encoding_ptr_list[] = {
        &mbfl_encoding_cp1251,
        &mbfl_encoding_cp866,
        &mbfl_encoding_koi8r,
+       &mbfl_encoding_koi8u,
        &mbfl_encoding_armscii8,
        &mbfl_encoding_cp850,
        NULL
@@ -197,16 +202,16 @@ mbfl_name2encoding(const char *name)
                return NULL;
        }
 
-       i = 0;
-       while ((encoding = mbfl_encoding_ptr_list[i++]) != NULL){
+       i = 0;
+       while ((encoding = mbfl_encoding_ptr_list[i++]) != NULL){
                if (strcasecmp(encoding->name, name) == 0) {
                        return encoding;
                }
        }
 
-       /* serch MIME charset name */
-       i = 0;
-       while ((encoding = mbfl_encoding_ptr_list[i++]) != NULL) {
+       /* search MIME charset name */
+       i = 0;
+       while ((encoding = mbfl_encoding_ptr_list[i++]) != NULL) {
                if (encoding->mime_name != NULL) {
                        if (strcasecmp(encoding->mime_name, name) == 0) {
                                return encoding;
@@ -214,12 +219,12 @@ mbfl_name2encoding(const char *name)
                }
        }
 
-       /* serch aliases */
-       i = 0;
-       while ((encoding = mbfl_encoding_ptr_list[i++]) != NULL) {
+       /* search aliases */
+       i = 0;
+       while ((encoding = mbfl_encoding_ptr_list[i++]) != NULL) {
                if (encoding->aliases != NULL) {
-                       j = 0;
-                       while ((*encoding->aliases)[j] != NULL) {
+                       j = 0;
+                       while ((*encoding->aliases)[j] != NULL) {
                                if (strcasecmp((*encoding->aliases)[j], name) == 0) {
                                        return encoding;
                                }
index 351a21717082a8eb6756a73ec9b06df12463b6ef..2599e1107e75f8017dc983865c5ba80d4fa23236 100644 (file)
@@ -75,6 +75,7 @@ enum mbfl_no_encoding {
        mbfl_no_encoding_2022jp,
        mbfl_no_encoding_2022jpms,
        mbfl_no_encoding_cp1252,
+       mbfl_no_encoding_cp1254,
        mbfl_no_encoding_8859_1,
        mbfl_no_encoding_8859_2,
        mbfl_no_encoding_8859_3,
@@ -99,6 +100,7 @@ enum mbfl_no_encoding {
        mbfl_no_encoding_cp1251,
        mbfl_no_encoding_cp866,
        mbfl_no_encoding_koi8r,
+       mbfl_no_encoding_koi8u,
        mbfl_no_encoding_8859_16,
        mbfl_no_encoding_armscii8,
        mbfl_no_encoding_cp850,
index ade0f2a1ab71da83bb59b2a7fbd03e331ba1026c..4f3bd5c58dfddf59c6a3b24dc90f8f0b4d787d1e 100644 (file)
 #include "filters/mbfilter_iso2022_kr.h"
 #include "filters/mbfilter_sjis.h"
 #include "filters/mbfilter_jis.h"
+#include "filters/mbfilter_iso2022_jp_ms.h"
 #include "filters/mbfilter_euc_jp.h"
 #include "filters/mbfilter_euc_jp_win.h"
 #include "filters/mbfilter_ascii.h"
 #include "filters/mbfilter_koi8r.h"
+#include "filters/mbfilter_koi8u.h"
 #include "filters/mbfilter_cp866.h"
 #include "filters/mbfilter_cp932.h"
 #include "filters/mbfilter_cp936.h"
 #include "filters/mbfilter_cp1251.h"
 #include "filters/mbfilter_cp1252.h"
+#include "filters/mbfilter_cp1254.h"
+#include "filters/mbfilter_cp51932.h"
 #include "filters/mbfilter_iso8859_1.h"
 #include "filters/mbfilter_iso8859_2.h"
 #include "filters/mbfilter_iso8859_3.h"
@@ -108,6 +112,7 @@ static const struct mbfl_identify_vtbl *mbfl_identify_filter_list[] = {
        &vtbl_identify_jis,
        &vtbl_identify_2022jp,
        &vtbl_identify_2022jpms,
+       &vtbl_identify_cp51932,
        &vtbl_identify_euccn,
        &vtbl_identify_cp936,
        &vtbl_identify_hz,
@@ -119,7 +124,9 @@ static const struct mbfl_identify_vtbl *mbfl_identify_filter_list[] = {
        &vtbl_identify_cp1251,
        &vtbl_identify_cp866,
        &vtbl_identify_koi8r,
+       &vtbl_identify_koi8u,
        &vtbl_identify_cp1252,
+       &vtbl_identify_cp1254,
        &vtbl_identify_8859_1,
        &vtbl_identify_8859_2,
        &vtbl_identify_8859_3,
index aaeebbc8e80754d1d2fc8bd4272e0d649ce4f83a..4dd97263629ac066f918c6804ef8bbe5e3818937 100644 (file)
@@ -57,6 +57,7 @@
 #include "nls/nls_uni.h"
 #include "nls/nls_de.h"
 #include "nls/nls_ru.h"
+#include "nls/nls_ua.h"
 #include "nls/nls_en.h"
 #include "nls/nls_hy.h"
 #include "nls/nls_tr.h"
@@ -77,6 +78,7 @@ static const mbfl_language *mbfl_language_ptr_table[] = {
        &mbfl_language_english,
        &mbfl_language_german,
        &mbfl_language_russian,
+       &mbfl_language_ukrainian,
        &mbfl_language_armenian,
        &mbfl_language_turkish,
        &mbfl_language_neutral,
index caf1d80940b21c2fcd41e263ce594ae9caa16f62..af42a010cfe96e4f6aa02d707aaeed787d216d4f 100644 (file)
@@ -57,6 +57,7 @@ enum mbfl_no_language {
        mbfl_no_language_simplified_chinese,            /* zh-cn */
        mbfl_no_language_traditional_chinese,           /* zh-tw */
        mbfl_no_language_russian,               /* ru */
+       mbfl_no_language_ukrainian,             /* ua */
        mbfl_no_language_armenian,              /* hy */
        mbfl_no_language_turkish,               /* tr */
        mbfl_no_language_max
index ca81f902c80b7249ceab6220fdb1ebb0be2aa58f..454a07c6384e6360c170a8eb6dc5d77f917c2edd 100644 (file)
@@ -2,4 +2,25 @@ EXTRA_DIST=Makefile.bcc32
 noinst_LTLIBRARIES=libmbfl_nls.la
 INCLUDES=-I../mbfl
 libmbfl_nls_la_LDFLAGS=-version-info $(SHLIB_VERSION)
-libmbfl_nls_la_SOURCES=nls_ja.c nls_de.c nls_en.c nls_hy.c nls_kr.c nls_ru.c nls_zh.c nls_uni.c nls_neutral.c nls_ja.h nls_de.h nls_en.h nls_hy.h nls_kr.h nls_ru.h nls_zh.h nls_uni.h nls_neutral.h
+libmbfl_nls_la_SOURCES=nls_ja.c \
+       nls_de.c \
+       nls_en.c \
+       nls_hy.c \
+       nls_tr.c \
+       nls_kr.c \
+       nls_ru.c \
+       nls_ua.c \
+       nls_zh.c \
+       nls_uni.c \
+       nls_neutral.c \
+       nls_ja.h \
+       nls_de.h \
+       nls_en.h \
+       nls_hy.h \
+       nls_tr.h \
+       nls_kr.h \
+       nls_ru.h \
+       nls_ua.h \
+       nls_zh.h \
+       nls_uni.h \
+       nls_neutral.h
index 444e88c52b6b82c124559d071e949f0b55a7674d..dea8689cb1535428f01b701f13e01578ff597d55 100644 (file)
@@ -1,6 +1,16 @@
 !include ..\rules.mak.bcc32
 INCLUDES=$(INCLUDES) -I..\mbfl
-OBJS=nls_ja.obj nls_de.obj nls_en.obj nls_hy.obj nls_kr.obj nls_ru.obj nls_zh.obj nls_uni.obj nls_neutral.obj
+OBJS=nls_ja.obj \
+       nls_de.obj \
+       nls_en.obj \
+       nls_hy.obj \
+       nls_tr.obj \
+       nls_kr.obj \
+       nls_ru.obj \
+       nls_ua.obj \
+       nls_zh.obj \
+       nls_uni.obj \
+       nls_neutral.obj
 
 all: $(OBJS)
 
diff --git a/ext/mbstring/libmbfl/nls/nls_ua.c b/ext/mbstring/libmbfl/nls/nls_ua.c
new file mode 100644 (file)
index 0000000..85fe9b4
--- /dev/null
@@ -0,0 +1,22 @@
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+
+#ifdef HAVE_STDDEF_H
+#include <stddef.h>
+#endif
+
+
+#include "mbfilter.h"
+#include "nls_ua.h"
+
+const mbfl_language mbfl_language_ukrainian = {
+       mbfl_no_language_ukrainian,
+       "Ukrainian",
+       "ua",
+       NULL,
+       mbfl_no_encoding_koi8u,
+       mbfl_no_encoding_qprint,
+       mbfl_no_encoding_8bit
+};
diff --git a/ext/mbstring/libmbfl/nls/nls_ua.h b/ext/mbstring/libmbfl/nls/nls_ua.h
new file mode 100644 (file)
index 0000000..0efa8d1
--- /dev/null
@@ -0,0 +1,9 @@
+#ifndef MBFL_NLS_UA_H
+#define MBFL_NLS_UA_H
+
+#include "mbfilter.h"
+#include "nls_ua.h"
+
+extern const mbfl_language mbfl_language_ukrainian;
+
+#endif /* MBFL_NLS_UA_H */
diff --git a/ext/mbstring/libmbfl/tests/Makefile.am b/ext/mbstring/libmbfl/tests/Makefile.am
new file mode 100644 (file)
index 0000000..8e857bc
--- /dev/null
@@ -0,0 +1,10 @@
+SUBDIRS=conv_encoding.tests conv_kana.tests strwidth.tests strcut.tests
+noinst_PROGRAMS=conv_encoding conv_kana strwidth strcut
+conv_encoding_SOURCES=conv_encoding.c
+conv_encoding_LDADD=../mbfl/libmbfl.la
+conv_kana_SOURCES=conv_kana.c
+conv_kana_LDADD=../mbfl/libmbfl.la
+strwidth_SOURCES=strwidth.c
+strwidth_LDADD=../mbfl/libmbfl.la
+strcut_SOURCES=strcut.c
+strcut_LDADD=../mbfl/libmbfl.la
diff --git a/ext/mbstring/libmbfl/tests/conv_encoding.c b/ext/mbstring/libmbfl/tests/conv_encoding.c
new file mode 100644 (file)
index 0000000..9769964
--- /dev/null
@@ -0,0 +1,104 @@
+/**
+ * this is a small sample script to use libmbfl.
+ * Rui Hirokawa <hirokawa@php.net>
+ *
+ * this file is encoded in EUC-JP.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include "mbfl/mbfilter.h"
+
+static void hexdump(const mbfl_string *ptr)
+{
+       unsigned int i;
+
+       for (i = 0; i < ptr->len; i++) {
+               printf("%%%02x", ptr->val[i]);
+       }
+
+       printf(" (%u)\n", ptr->len);
+}
+
+int main(int argc, char **argv)
+{
+       enum mbfl_no_encoding from_encoding, to_encoding;
+       enum mbfl_no_language no_language;
+       mbfl_buffer_converter *convd = NULL;
+       mbfl_memory_device dev;
+       mbfl_string string, result, *ret;
+       int final = 0;
+       int state = 0;
+
+       if (argc < 4) {
+               fprintf(stderr, "Usage: %s lang to_encoding from_encoding\n", argv[0]);
+               return EXIT_FAILURE;
+       }
+
+       if ((no_language = mbfl_name2no_language(argv[1])) ==
+                       mbfl_no_language_invalid) {
+               printf("Unsupported NLS: %s\n", argv[1]);
+               return EXIT_FAILURE;
+       }
+
+       if ((to_encoding = mbfl_name2no_encoding(argv[2])) ==
+                       mbfl_no_encoding_invalid) {
+               printf("Unsupported encoding: %s\n", argv[2]);
+               return EXIT_FAILURE;
+       }
+
+       if ((from_encoding = mbfl_name2no_encoding(argv[3])) ==
+                       mbfl_no_encoding_invalid) {
+               printf("Unsupported encoding: %s\n", argv[3]);
+               return EXIT_FAILURE;
+       }
+
+       convd = mbfl_buffer_converter_new(from_encoding, to_encoding, 0);
+
+       do {
+               mbfl_memory_device_init(&dev, 0, 4096);
+               mbfl_string_init_set(&string, no_language, from_encoding);
+
+               for (;;) {
+                       const int c = fgetc(stdin);
+
+                       if (c == EOF) {
+                               final = 1;
+                               break;
+                       } else if (c == 10) {
+                               if (state == 1) {
+                                       state = 0;
+                                       continue;
+                               }
+                               break;
+                       } else if (c == 13) {
+                               state = 1;
+                               break;
+                       }
+
+                       if (dev.pos >= dev.length) {
+                               if (dev.length + dev.allocsz < dev.length) {
+                                       printf("Unable to allocate memory\n");
+                                       return EXIT_FAILURE;
+                               }
+
+                               mbfl_memory_device_realloc(&dev, dev.length + dev.allocsz,
+                                               dev.allocsz);
+                       }
+
+                       dev.buffer[dev.pos++] = (unsigned char)c;
+               }
+
+               mbfl_memory_device_result(&dev, &string);
+               mbfl_string_init_set(&result, no_language, to_encoding);
+               ret = mbfl_buffer_converter_feed_result(convd, &string, &result);
+               hexdump(&result);
+               mbfl_string_clear(&result);
+               mbfl_string_clear(&string);
+       } while (!final);
+
+       mbfl_buffer_converter_delete(convd);    
+
+       return EXIT_SUCCESS;
+}
diff --git a/ext/mbstring/libmbfl/tests/conv_encoding.tests/Makefile.am b/ext/mbstring/libmbfl/tests/conv_encoding.tests/Makefile.am
new file mode 100644 (file)
index 0000000..37713c3
--- /dev/null
@@ -0,0 +1 @@
+EXTRA_DIST=*.exp
diff --git a/ext/mbstring/libmbfl/tests/conv_encoding.tests/cp51932_cp50220raw.exp b/ext/mbstring/libmbfl/tests/conv_encoding.tests/cp51932_cp50220raw.exp
new file mode 100644 (file)
index 0000000..0e63ef1
--- /dev/null
@@ -0,0 +1,33 @@
+#!/usr/bin/expect -f
+spawn tests/conv_encoding Japanese CP50220raw eucJP-win
+set timeout 1
+
+expect_after {
+    "\[^\r\n\]*\r\n" { fail $test }
+}
+
+set test "81 - 87ku"
+send "\xf5\xba\xf6\xec\xf7\xc9\xf8\xb3\xf9\xa1\xfa\xa1\xfb\xa1\r"
+expect {
+    "%1b%24%42%75%3a%76%6c%77%49%78%33%79%21%7a%21%7b%21%1b%28%42 (20)\r\n" { pass $test }
+}
+
+
+set test "kanji + kana"
+send "ÆüËÜ¸ì¥Æ¥¹¥È\r"
+expect {
+    "%1b%24%42%46%7c%4b%5c%38%6c%25%46%25%39%25%48%1b%28%42 (18)\r\n" { pass $test }
+}
+
+set test "full-width numerics"
+send "£°£±£²£³£´£µ£¶£·£¸£¹\r"
+expect {
+    "%1b%24%42%23%30%23%31%23%32%23%33%23%34%23%35%23%36%23%37%23%38%23%39%1b%28%42 (26)\r\n" { pass $test }
+}
+
+set test "full-width numerics"
+send "­Î"
+expect {
+    "%1b%24%42%2d%42%1b%28%42 (8)\r\n" { pass $test }
+}
+
diff --git a/ext/mbstring/libmbfl/tests/conv_encoding.tests/ujis_sjis.exp b/ext/mbstring/libmbfl/tests/conv_encoding.tests/ujis_sjis.exp
new file mode 100644 (file)
index 0000000..882953f
--- /dev/null
@@ -0,0 +1,35 @@
+#!/usr/bin/expect -f
+spawn tests/conv_encoding Japanese Shift_JIS EUC-JP
+set timeout 1
+
+expect_after {
+    "\[^\r\n\]*\r\n" { fail $test }
+}
+
+set test "basic test"
+send "testtest\r"
+expect {
+    "%74%65%73%74%74%65%73%74 (8)\r\n" { pass $test }
+}
+
+
+set test "kanji + kana"
+send "ÆüËÜ¸ì¥Æ¥¹¥È\r"
+expect {
+    "%93%fa%96%7b%8c%ea%83%65%83%58%83%67 (12)\r\n" { pass $test }
+}
+
+set test "full-width numerics"
+send "£°£±£²£³£´£µ£¶£·£¸£¹\r"
+expect {
+    "%82%4f%82%50%82%51%82%52%82%53%82%54%82%55%82%56%82%57%82%58 (20)\r\n" { pass $test }
+}
+
+set test "full-width numerics"
+send "­Î"
+expect {
+    "%3f (1)\r\n" { pass $test }
+}
+
+close
+# vim: sts=4 ts=4 sw=4 et encoding=EUC-JP
diff --git a/ext/mbstring/libmbfl/tests/conv_encoding.tests/utf8_sjis.exp b/ext/mbstring/libmbfl/tests/conv_encoding.tests/utf8_sjis.exp
new file mode 100644 (file)
index 0000000..e51b5e4
--- /dev/null
@@ -0,0 +1,35 @@
+#!/usr/bin/expect -f
+spawn tests/conv_encoding Japanese Shift_JIS UTF-8
+set timeout 1
+
+expect_after {
+    "\[^\r\n\]*\r\n" { fail $test }
+}
+
+set test "basic test"
+send "testtest\r"
+expect {
+    "%74%65%73%74%74%65%73%74 (8)\r\n" { pass $test }
+}
+
+
+set test "kanji + kana"
+send "日本語テスト\r"
+expect {
+    "%93%fa%96%7b%8c%ea%83%65%83%58%83%67 (12)\r\n" { pass $test }
+}
+
+set test "full-width numerics"
+send "0123456789\r"
+expect {
+    "%82%4f%82%50%82%51%82%52%82%53%82%54%82%55%82%56%82%57%82%58 (20)\r\n" { pass $test }
+}
+
+set test "full-width numerics"
+send "㍊"
+expect {
+    "%3f (1)\r\n" { pass $test }
+}
+
+close
+# vim: sts=4 ts=4 sw=4 et encoding=EUC-JP
diff --git a/ext/mbstring/libmbfl/tests/conv_kana.c b/ext/mbstring/libmbfl/tests/conv_kana.c
new file mode 100644 (file)
index 0000000..c12fdc2
--- /dev/null
@@ -0,0 +1,147 @@
+/**
+ * this is a small sample script to use libmbfl.
+ * Rui Hirokawa <hirokawa@php.net>
+ *
+ * this file is encoded in EUC-JP.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include "mbfl/mbfilter.h"
+
+static void hexdump(const mbfl_string *ptr)
+{
+       unsigned int i;
+
+       for (i = 0; i < ptr->len; i++) {
+               printf("%%%02x", ptr->val[i]);
+       }
+
+       printf(" (%u)\n", ptr->len);
+}
+
+int main(int argc, char **argv)
+{
+       enum mbfl_no_encoding no_enc;
+       const enum mbfl_no_language no_lang = mbfl_no_language_japanese;
+       mbfl_memory_device dev;
+       mbfl_string string, result;
+       int final = 0;
+       int state = 0;
+       int mode = 0;
+
+       if (argc < 3) {
+               fprintf(stderr, "Usage: %s encoding flags\n", argv[0]);
+               return EXIT_FAILURE;
+       }
+
+       if ((no_enc = mbfl_name2no_encoding(argv[1])) ==
+                       mbfl_no_encoding_invalid) {
+               printf("Unsupported encoding: %s\n", argv[1]);
+               return EXIT_FAILURE;
+       }
+
+       {
+               const char *p;
+
+               for (p= argv[2] + strlen(argv[2]); p > argv[2]; ) {
+                       switch (*(--p)) {
+                       case 'A':
+                               mode |= 0x1;
+                               break;
+                       case 'a':
+                               mode |= 0x10;
+                               break;
+                       case 'R':
+                               mode |= 0x2;
+                               break;
+                       case 'r':
+                               mode |= 0x20;
+                               break;
+                       case 'N':
+                               mode |= 0x4;
+                               break;
+                       case 'n':
+                               mode |= 0x40;
+                               break;
+                       case 'S':
+                               mode |= 0x8;
+                               break;
+                       case 's':
+                               mode |= 0x80;
+                               break;
+                       case 'K':
+                               mode |= 0x100;
+                               break;
+                       case 'k':
+                               mode |= 0x1000;
+                               break;
+                       case 'H':
+                               mode |= 0x200;
+                               break;
+                       case 'h':
+                               mode |= 0x2000;
+                               break;
+                       case 'V':
+                               mode |= 0x800;
+                               break;
+                       case 'C':
+                               mode |= 0x10000;
+                               break;
+                       case 'c':
+                               mode |= 0x20000;
+                               break;
+                       case 'M':
+                               mode |= 0x100000;
+                               break;
+                       case 'm':
+                               mode |= 0x200000;
+                               break;
+                       }
+               }
+       }
+
+       do {
+               mbfl_memory_device_init(&dev, 0, 4096);
+               mbfl_string_init_set(&string, no_lang, no_enc);
+
+               for (;;) {
+                       const int c = fgetc(stdin);
+
+                       if (c == EOF) {
+                               final = 1;
+                               break;
+                       } else if (c == 10) {
+                               if (state == 1) {
+                                       state = 0;
+                                       continue;
+                               }
+                               break;
+                       } else if (c == 13) {
+                               state = 1;
+                               break;
+                       }
+
+                       if (dev.pos >= dev.length) {
+                               if (dev.length + dev.allocsz < dev.length) {
+                                       printf("Unable to allocate memory\n");
+                                       return EXIT_FAILURE;
+                               }
+
+                               mbfl_memory_device_realloc(&dev, dev.length + dev.allocsz,
+                                               dev.allocsz);
+                       }
+
+                       dev.buffer[dev.pos++] = (unsigned char)c;
+               }
+
+               mbfl_memory_device_result(&dev, &string);
+               mbfl_ja_jp_hantozen(&string, &result, mode);
+               hexdump(&result);
+               mbfl_string_clear(&result);
+               mbfl_string_clear(&string);
+       } while (!final);
+
+       return EXIT_SUCCESS;
+}
diff --git a/ext/mbstring/libmbfl/tests/strcut.c b/ext/mbstring/libmbfl/tests/strcut.c
new file mode 100644 (file)
index 0000000..2d6a873
--- /dev/null
@@ -0,0 +1,113 @@
+/**
+ * this is a small sample script to use libmbfl.
+ * Rui Hirokawa <hirokawa@php.net>
+ *
+ * this file is encoded in EUC-JP.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+#include "mbfl/mbfilter.h"
+
+static void hexdump(const mbfl_string *ptr)
+{
+       unsigned int i;
+
+       for (i = 0; i < ptr->len; i++) {
+               printf("%%%02x", ptr->val[i]);
+       }
+
+       printf(" (%u)\n", ptr->len);
+}
+
+int main(int argc, char **argv)
+{
+       enum mbfl_no_encoding no_encoding;
+       enum mbfl_no_language no_language;
+       mbfl_memory_device dev;
+       mbfl_string string;
+       int offset, length;
+       int final = 0;
+       int state = 0;
+
+       if (argc < 5) {
+               fprintf(stderr, "Usage: %s lang encoding offset length\n", argv[0]);
+               return EXIT_FAILURE;
+       }
+
+       if ((no_language = mbfl_name2no_language(argv[1])) ==
+                       mbfl_no_language_invalid) {
+               printf("Unsupported NLS: %s\n", argv[1]);
+               return EXIT_FAILURE;
+       }
+
+       if ((no_encoding = mbfl_name2no_encoding(argv[2])) ==
+                       mbfl_no_encoding_invalid) {
+               printf("Unsupported encoding: %s\n", argv[2]);
+               return EXIT_FAILURE;
+       }
+
+       errno = 0;
+       offset = strtol(argv[3], NULL, 10);
+       if (errno) {
+               printf("Invalid offset: %s\n", argv[3]);
+               return EXIT_FAILURE;
+       }
+
+       length = strtol(argv[4], NULL, 10);
+       if (errno) {
+               printf("Invalid length: %s\n", argv[4]);
+               return EXIT_FAILURE;
+       }
+
+
+       do {
+               mbfl_string result;
+
+               mbfl_memory_device_init(&dev, 0, 4096);
+               mbfl_string_init_set(&string, no_language, no_encoding);
+
+               for (;;) {
+                       const int c = fgetc(stdin);
+
+                       if (c == EOF) {
+                               final = 1;
+                               break;
+                       } else if (c == 10) {
+                               if (state == 1) {
+                                       state = 0;
+                                       continue;
+                               }
+                               break;
+                       } else if (c == 13) {
+                               state = 1;
+                               break;
+                       }
+
+                       if (dev.pos >= dev.length) {
+                               if (dev.length + dev.allocsz < dev.length) {
+                                       printf("Unable to allocate memory\n");
+                                       return EXIT_FAILURE;
+                               }
+
+                               mbfl_memory_device_realloc(&dev, dev.length + dev.allocsz,
+                                               dev.allocsz);
+                       }
+
+                       dev.buffer[dev.pos++] = (unsigned char)c;
+               }
+
+               mbfl_memory_device_result(&dev, &string);
+               if (mbfl_strcut(&string, &result, offset, length)) {
+                       hexdump(&result);
+                       mbfl_string_clear(&result);
+               } else {
+                       printf("***ERROR***\n");
+               }
+               mbfl_string_clear(&string);
+       } while (!final);
+
+       return EXIT_SUCCESS;
+}
diff --git a/ext/mbstring/libmbfl/tests/strcut.tests/Makefile.am b/ext/mbstring/libmbfl/tests/strcut.tests/Makefile.am
new file mode 100644 (file)
index 0000000..37713c3
--- /dev/null
@@ -0,0 +1 @@
+EXTRA_DIST=*.exp
diff --git a/ext/mbstring/libmbfl/tests/strcut.tests/iso2022jp.exp b/ext/mbstring/libmbfl/tests/strcut.tests/iso2022jp.exp
new file mode 100644 (file)
index 0000000..f203bbf
--- /dev/null
@@ -0,0 +1,129 @@
+#!/usr/bin/expect -f
+proc begin_strcut_test {_from _length} {
+    global spawn_id from length
+    set from $_from
+    set length $_length
+
+    spawn tests/strcut Japanese "ISO-2022-JP" $_from $_length
+    set timeout 10
+
+    expect_after {
+        "\[^\r\n\]*\r\n" { fail $test }
+    }
+}
+
+begin_strcut_test -1 2
+
+set test "asciish characters ($from, $length)"
+send "testtest\r"
+expect {
+    -ex "***ERROR***\r\n" { pass $test }
+}
+
+set test "non-asciish characters ($from, $length)"
+send "\x1b\$B%F%9%H%F%9%H\x1b(B\r"
+sleep 1
+expect {
+    -ex "***ERROR***\r\n" { pass $test }
+}
+
+close
+begin_strcut_test 2 -1
+
+set test "asciish characters ($from, $length)"
+send "testtest\r"
+expect {
+    -ex "***ERROR***\r\n" { pass $test }
+}
+
+set test "non-asciish characters ($from, $length)"
+send "\x1b\$B%F%9%H%F%9%H\x1b(B\r"
+sleep 1
+expect {
+    -ex "***ERROR***\r\n" { pass $test }
+}
+
+close
+begin_strcut_test 3 2
+
+set test "asciish characters ($from, $length)"
+send "testtest\r"
+expect {
+    -ex "%74%74 (2)\r\n" { pass $test }
+}
+
+set test "non-asciish characters ($from, $length)"
+send "\x1b\$B%F%9%H%F%9%H\x1b(B\r"
+sleep 1
+expect {
+    -ex " (0)\r\n" { pass $test }
+}
+
+close
+begin_strcut_test 5 8
+
+set test "asciish characters ($from, $length)"
+send "testtest\r"
+expect {
+    -ex "%65%73%74 (3)\r\n" { pass $test }
+}
+
+set test "non-asciish characters ($from, $length)"
+sleep 1
+send "\x1b\$B%F%9%H%F%9%H\x1b(B\r"
+sleep 1
+expect {
+    -ex "%1b%24%42%25%39%1b%28%42 (8)\r\n" { pass $test }
+}
+
+close
+begin_strcut_test 1 15
+
+set test "asciish characters ($from, $length)"
+send "testestestestestes\r"
+expect {
+     "%65%73%74%65%73%74%65%73%74%65%73%74%65%73%74 (15)\r\n" { pass $test }
+}
+
+set test "non-asciish characters ($from, $length)"
+send "\x1b\$B%F%9%H%F%9%H\x1b(B\r"
+sleep 1
+expect {
+    -ex "%1b%24%42%25%46%25%39%25%48%25%46%1b%28%42 (14)\r\n" { pass $test }
+}
+close
+begin_strcut_test 8 20
+
+set test "non-asciish characters (2) ($from, $length)"
+send "\x1b\x24\x42\x25\x46\x1b\x28\x42\x74\x1b\x24\x42\x25\x39\x1b\x28\x42\x74\x1b\x24\x42\x25\x48\x24\x46\x24\x39\x24\x48\x1b\x28\x49\x4a\x5e\x4a\x5e\x4a\x5e\x43\x3d\x44\x1b\x28\x42\x74\x1b\x24\x42\x25\x46\x1b\x28\x42\x74\x1b\x24\x42\x25\x39\x1b\x28\x42\x74\x1b\x24\x42\x25\x48\x1b\x28\x42\x74\x1b\x24\x42\x25\x46\x1b\x28\x42\x74\x1b\x24\x42\x25\x39\x1b\x28\x42\x74\x1b\x24\x42\x25\x48\x1b\x28\x42\x74\x1b\x24\x42\x25\x46\x1b\x28\x42\x74\x1b\x24\x42\x25\x39\x1b\x28\x42\x74\x1b\x24\x42\x25\x48\x1b\x28\x42\x74\r"
+sleep 1
+expect {
+    -ex "%74%1b%24%42%25%39%1b%28%42%74%1b%24%42%25%48%24%46%1b%28%42 (20)\r\n" {
+        pass $test
+    }
+}
+
+begin_strcut_test 8 21
+
+set test "non-asciish characters (2) ($from, $length)"
+sleep 1
+send "\x1b\x24\x42\x25\x46\x1b\x28\x42\x74\x1b\x24\x42\x25\x39\x1b\x28\x42\x74\x1b\x24\x42\x25\x48\x24\x46\x24\x39\x24\x48\x1b\x28\x49\x4a\x5e\x4a\x5e\x4a\x5e\x43\x3d\x44\x1b\x28\x42\x74\x1b\x24\x42\x25\x46\x1b\x28\x42\x74\x1b\x24\x42\x25\x39\x1b\x28\x42\x74\x1b\x24\x42\x25\x48\x1b\x28\x42\x74\x1b\x24\x42\x25\x46\x1b\x28\x42\x74\x1b\x24\x42\x25\x39\x1b\x28\x42\x74\x1b\x24\x42\x25\x48\x1b\x28\x42\x74\x1b\x24\x42\x25\x46\x1b\x28\x42\x74\x1b\x24\x42\x25\x39\x1b\x28\x42\x74\x1b\x24\x42\x25\x48\x1b\x28\x42\x74\r"
+expect {
+    -ex "%74%1b%24%42%25%39%1b%28%42%74%1b%24%42%25%48%24%46%1b%28%42 (20)\r\n" {
+        pass $test
+    }
+}
+
+begin_strcut_test 11 17
+
+set test "non-asciish characters (2) ($from, $length)"
+sleep 1
+send "\x1b\x24\x42\x25\x46\x1b\x28\x42\x74\x1b\x24\x42\x25\x39\x1b\x28\x42\x74\x1b\x24\x42\x25\x48\x24\x46\x24\x39\x24\x48\x1b\x28\x49\x4a\x5e\x4a\x5e\x4a\x5e\x43\x3d\x44\x1b\x28\x42\x74\x1b\x24\x42\x25\x46\x1b\x28\x42\x74\x1b\x24\x42\x25\x39\x1b\x28\x42\x74\x1b\x24\x42\x25\x48\x1b\x28\x42\x74\x1b\x24\x42\x25\x46\x1b\x28\x42\x74\x1b\x24\x42\x25\x39\x1b\x28\x42\x74\x1b\x24\x42\x25\x48\x1b\x28\x42\x74\x1b\x24\x42\x25\x46\x1b\x28\x42\x74\x1b\x24\x42\x25\x39\x1b\x28\x42\x74\x1b\x24\x42\x25\x48\x1b\x28\x42\x74\r"
+expect {
+    -ex "%1b%24%42%25%39%1b%28%42%74%1b%24%42%25%48%1b%28%42 (17)\r\n" {
+        pass $test
+    }
+}
+
+
+# vim: sts=4 sw=4 ts=4 et
diff --git a/ext/mbstring/libmbfl/tests/strcut.tests/ujis.exp b/ext/mbstring/libmbfl/tests/strcut.tests/ujis.exp
new file mode 100644 (file)
index 0000000..8ad6f95
--- /dev/null
@@ -0,0 +1,91 @@
+#!/usr/bin/expect -f
+proc begin_strcut_test {_from _length} {
+    global spawn_id from length
+    set from $_from
+    set length $_length
+
+    spawn tests/strcut Japanese EUC-JP $_from $_length
+    set timeout 1
+
+    expect_after {
+        "\[^\r\n\]*\r\n" { fail $test }
+    }
+}
+
+begin_strcut_test -1 2
+
+set test "asciish characters ($from, $length)"
+send "testtest\r"
+expect {
+    -ex "***ERROR***\r\n" { pass $test }
+}
+
+set test "non-asciish characters ($from, $length)"
+send "¥Æ¥¹¥È¥Æ¥¹¥È\r"
+expect {
+    -ex "***ERROR***\r\n" { pass $test }
+}
+
+close
+begin_strcut_test 2 -1
+
+set test "asciish characters ($from, $length)"
+send "testtest\r"
+expect {
+    -ex "***ERROR***\r\n" { pass $test }
+}
+
+set test "non-asciish characters ($from, $length)"
+send "¥Æ¥¹¥È¥Æ¥¹¥È\r"
+expect {
+    -ex "***ERROR***\r\n" { pass $test }
+}
+
+close
+begin_strcut_test 3 2
+
+set test "asciish characters ($from, $length)"
+send "testtest\r"
+expect {
+    -ex "%74%74 (2)\r\n" { pass $test }
+}
+
+set test "non-asciish characters ($from, $length)"
+send "¥Æ¥¹¥È¥Æ¥¹¥È\r"
+expect {
+    -ex "%a5%b9 (2)\r\n" { pass $test }
+}
+
+close
+begin_strcut_test 5 8
+
+set test "asciish characters ($from, $length)"
+send "testtest\r"
+expect {
+    -ex "%65%73%74 (3)\r\n" { pass $test }
+}
+
+set test "non-asciish characters ($from, $length)"
+send "¥Æ¥¹¥È¥Æ¥¹¥È\r"
+expect {
+    -ex "%a5%c8%a5%c6%a5%b9%a5%c8 (8)\r\n" { pass $test }
+}
+
+close
+begin_strcut_test 1 15
+
+set test "asciish characters ($from, $length)"
+send "testestestestestes\r"
+expect {
+     "%65%73%74%65%73%74%65%73%74%65%73%74%65%73%74 (15)\r\n" { pass $test }
+}
+
+set test "non-asciish characters ($from, $length)"
+send "¥Æ¥¹¥È¥Æ¥¹¥È\r"
+expect {
+    -ex "%a5%c6%a5%b9%a5%c8%a5%c6%a5%b9%a5%c8 (12)\r\n" { pass $test }
+}
+close
+
+
+# vim: sts=4 sw=4 ts=4 et encoding=EUC-JP
diff --git a/ext/mbstring/libmbfl/tests/strcut.tests/utf8.exp b/ext/mbstring/libmbfl/tests/strcut.tests/utf8.exp
new file mode 100644 (file)
index 0000000..5104bf1
--- /dev/null
@@ -0,0 +1,91 @@
+#!/usr/bin/expect -f
+proc begin_strcut_test {_from _length} {
+    global spawn_id from length
+    set from $_from
+    set length $_length
+
+    spawn tests/strcut Japanese UTF-8 $_from $_length
+    set timeout 1
+
+    expect_after {
+        "\[^\r\n\]*\r\n" { fail $test }
+    }
+}
+
+begin_strcut_test -1 2
+
+set test "asciish characters ($from, $length)"
+send "testtest\r"
+expect {
+    -ex "***ERROR***\r\n" { pass $test }
+}
+
+set test "non-asciish characters ($from, $length)"
+send "テストテスト\r"
+expect {
+    -ex "***ERROR***\r\n" { pass $test }
+}
+
+close
+begin_strcut_test 2 -1
+
+set test "asciish characters ($from, $length)"
+send "testtest\r"
+expect {
+    -ex "***ERROR***\r\n" { pass $test }
+}
+
+set test "non-asciish characters ($from, $length)"
+send "テストテスト\r"
+expect {
+    -ex "***ERROR***\r\n" { pass $test }
+}
+
+close
+begin_strcut_test 3 2
+
+set test "asciish characters ($from, $length)"
+send "testtest\r"
+expect {
+    -ex "%74%74 (2)\r\n" { pass $test }
+}
+
+set test "non-asciish characters ($from, $length)"
+send "テストテスト\r"
+expect {
+    -ex "(0)\r\n" { pass $test }
+}
+
+close
+begin_strcut_test 5 8
+
+set test "asciish characters ($from, $length)"
+send "testtest\r"
+expect {
+    -ex "%65%73%74 (3)\r\n" { pass $test }
+}
+
+set test "non-asciish characters ($from, $length)"
+send "テストテスト\r"
+expect {
+    -ex "%e3%82%b9%e3%83%88 (6)\r\n" { pass $test }
+}
+
+close
+begin_strcut_test 1 15
+
+set test "asciish characters ($from, $length)"
+send "testestestestestes\r"
+expect {
+     "%65%73%74%65%73%74%65%73%74%65%73%74%65%73%74 (15)\r\n" { pass $test }
+}
+
+set test "non-asciish characters ($from, $length)"
+send "テストテスト\r"
+expect {
+    -ex "%e3%83%86%e3%82%b9%e3%83%88%e3%83%86%e3%82%b9 (15)\r\n" { pass $test }
+}
+close
+
+
+# vim: sts=4 sw=4 ts=4 et encoding=UTF-8
diff --git a/ext/mbstring/libmbfl/tests/strwidth.tests/Makefile.am b/ext/mbstring/libmbfl/tests/strwidth.tests/Makefile.am
new file mode 100644 (file)
index 0000000..37713c3
--- /dev/null
@@ -0,0 +1 @@
+EXTRA_DIST=*.exp
diff --git a/ext/mbstring/libmbfl/tests/strwidth.tests/conv_encoding.c b/ext/mbstring/libmbfl/tests/strwidth.tests/conv_encoding.c
new file mode 100644 (file)
index 0000000..9769964
--- /dev/null
@@ -0,0 +1,104 @@
+/**
+ * this is a small sample script to use libmbfl.
+ * Rui Hirokawa <hirokawa@php.net>
+ *
+ * this file is encoded in EUC-JP.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include "mbfl/mbfilter.h"
+
+static void hexdump(const mbfl_string *ptr)
+{
+       unsigned int i;
+
+       for (i = 0; i < ptr->len; i++) {
+               printf("%%%02x", ptr->val[i]);
+       }
+
+       printf(" (%u)\n", ptr->len);
+}
+
+int main(int argc, char **argv)
+{
+       enum mbfl_no_encoding from_encoding, to_encoding;
+       enum mbfl_no_language no_language;
+       mbfl_buffer_converter *convd = NULL;
+       mbfl_memory_device dev;
+       mbfl_string string, result, *ret;
+       int final = 0;
+       int state = 0;
+
+       if (argc < 4) {
+               fprintf(stderr, "Usage: %s lang to_encoding from_encoding\n", argv[0]);
+               return EXIT_FAILURE;
+       }
+
+       if ((no_language = mbfl_name2no_language(argv[1])) ==
+                       mbfl_no_language_invalid) {
+               printf("Unsupported NLS: %s\n", argv[1]);
+               return EXIT_FAILURE;
+       }
+
+       if ((to_encoding = mbfl_name2no_encoding(argv[2])) ==
+                       mbfl_no_encoding_invalid) {
+               printf("Unsupported encoding: %s\n", argv[2]);
+               return EXIT_FAILURE;
+       }
+
+       if ((from_encoding = mbfl_name2no_encoding(argv[3])) ==
+                       mbfl_no_encoding_invalid) {
+               printf("Unsupported encoding: %s\n", argv[3]);
+               return EXIT_FAILURE;
+       }
+
+       convd = mbfl_buffer_converter_new(from_encoding, to_encoding, 0);
+
+       do {
+               mbfl_memory_device_init(&dev, 0, 4096);
+               mbfl_string_init_set(&string, no_language, from_encoding);
+
+               for (;;) {
+                       const int c = fgetc(stdin);
+
+                       if (c == EOF) {
+                               final = 1;
+                               break;
+                       } else if (c == 10) {
+                               if (state == 1) {
+                                       state = 0;
+                                       continue;
+                               }
+                               break;
+                       } else if (c == 13) {
+                               state = 1;
+                               break;
+                       }
+
+                       if (dev.pos >= dev.length) {
+                               if (dev.length + dev.allocsz < dev.length) {
+                                       printf("Unable to allocate memory\n");
+                                       return EXIT_FAILURE;
+                               }
+
+                               mbfl_memory_device_realloc(&dev, dev.length + dev.allocsz,
+                                               dev.allocsz);
+                       }
+
+                       dev.buffer[dev.pos++] = (unsigned char)c;
+               }
+
+               mbfl_memory_device_result(&dev, &string);
+               mbfl_string_init_set(&result, no_language, to_encoding);
+               ret = mbfl_buffer_converter_feed_result(convd, &string, &result);
+               hexdump(&result);
+               mbfl_string_clear(&result);
+               mbfl_string_clear(&string);
+       } while (!final);
+
+       mbfl_buffer_converter_delete(convd);    
+
+       return EXIT_SUCCESS;
+}
diff --git a/ext/mbstring/libmbfl/tests/strwidth.tests/conv_encoding.tests/Makefile.am b/ext/mbstring/libmbfl/tests/strwidth.tests/conv_encoding.tests/Makefile.am
new file mode 100644 (file)
index 0000000..37713c3
--- /dev/null
@@ -0,0 +1 @@
+EXTRA_DIST=*.exp
diff --git a/ext/mbstring/libmbfl/tests/strwidth.tests/conv_encoding.tests/cp51932_cp50220raw.exp b/ext/mbstring/libmbfl/tests/strwidth.tests/conv_encoding.tests/cp51932_cp50220raw.exp
new file mode 100644 (file)
index 0000000..0e63ef1
--- /dev/null
@@ -0,0 +1,33 @@
+#!/usr/bin/expect -f
+spawn tests/conv_encoding Japanese CP50220raw eucJP-win
+set timeout 1
+
+expect_after {
+    "\[^\r\n\]*\r\n" { fail $test }
+}
+
+set test "81 - 87ku"
+send "\xf5\xba\xf6\xec\xf7\xc9\xf8\xb3\xf9\xa1\xfa\xa1\xfb\xa1\r"
+expect {
+    "%1b%24%42%75%3a%76%6c%77%49%78%33%79%21%7a%21%7b%21%1b%28%42 (20)\r\n" { pass $test }
+}
+
+
+set test "kanji + kana"
+send "ÆüËÜ¸ì¥Æ¥¹¥È\r"
+expect {
+    "%1b%24%42%46%7c%4b%5c%38%6c%25%46%25%39%25%48%1b%28%42 (18)\r\n" { pass $test }
+}
+
+set test "full-width numerics"
+send "£°£±£²£³£´£µ£¶£·£¸£¹\r"
+expect {
+    "%1b%24%42%23%30%23%31%23%32%23%33%23%34%23%35%23%36%23%37%23%38%23%39%1b%28%42 (26)\r\n" { pass $test }
+}
+
+set test "full-width numerics"
+send "­Î"
+expect {
+    "%1b%24%42%2d%42%1b%28%42 (8)\r\n" { pass $test }
+}
+
diff --git a/ext/mbstring/libmbfl/tests/strwidth.tests/conv_encoding.tests/ujis_sjis.exp b/ext/mbstring/libmbfl/tests/strwidth.tests/conv_encoding.tests/ujis_sjis.exp
new file mode 100644 (file)
index 0000000..882953f
--- /dev/null
@@ -0,0 +1,35 @@
+#!/usr/bin/expect -f
+spawn tests/conv_encoding Japanese Shift_JIS EUC-JP
+set timeout 1
+
+expect_after {
+    "\[^\r\n\]*\r\n" { fail $test }
+}
+
+set test "basic test"
+send "testtest\r"
+expect {
+    "%74%65%73%74%74%65%73%74 (8)\r\n" { pass $test }
+}
+
+
+set test "kanji + kana"
+send "ÆüËÜ¸ì¥Æ¥¹¥È\r"
+expect {
+    "%93%fa%96%7b%8c%ea%83%65%83%58%83%67 (12)\r\n" { pass $test }
+}
+
+set test "full-width numerics"
+send "£°£±£²£³£´£µ£¶£·£¸£¹\r"
+expect {
+    "%82%4f%82%50%82%51%82%52%82%53%82%54%82%55%82%56%82%57%82%58 (20)\r\n" { pass $test }
+}
+
+set test "full-width numerics"
+send "­Î"
+expect {
+    "%3f (1)\r\n" { pass $test }
+}
+
+close
+# vim: sts=4 ts=4 sw=4 et encoding=EUC-JP
diff --git a/ext/mbstring/libmbfl/tests/strwidth.tests/conv_encoding.tests/utf8_sjis.exp b/ext/mbstring/libmbfl/tests/strwidth.tests/conv_encoding.tests/utf8_sjis.exp
new file mode 100644 (file)
index 0000000..e51b5e4
--- /dev/null
@@ -0,0 +1,35 @@
+#!/usr/bin/expect -f
+spawn tests/conv_encoding Japanese Shift_JIS UTF-8
+set timeout 1
+
+expect_after {
+    "\[^\r\n\]*\r\n" { fail $test }
+}
+
+set test "basic test"
+send "testtest\r"
+expect {
+    "%74%65%73%74%74%65%73%74 (8)\r\n" { pass $test }
+}
+
+
+set test "kanji + kana"
+send "日本語テスト\r"
+expect {
+    "%93%fa%96%7b%8c%ea%83%65%83%58%83%67 (12)\r\n" { pass $test }
+}
+
+set test "full-width numerics"
+send "0123456789\r"
+expect {
+    "%82%4f%82%50%82%51%82%52%82%53%82%54%82%55%82%56%82%57%82%58 (20)\r\n" { pass $test }
+}
+
+set test "full-width numerics"
+send "㍊"
+expect {
+    "%3f (1)\r\n" { pass $test }
+}
+
+close
+# vim: sts=4 ts=4 sw=4 et encoding=EUC-JP
diff --git a/ext/mbstring/libmbfl/tests/strwidth.tests/conv_kana.c b/ext/mbstring/libmbfl/tests/strwidth.tests/conv_kana.c
new file mode 100644 (file)
index 0000000..c12fdc2
--- /dev/null
@@ -0,0 +1,147 @@
+/**
+ * this is a small sample script to use libmbfl.
+ * Rui Hirokawa <hirokawa@php.net>
+ *
+ * this file is encoded in EUC-JP.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include "mbfl/mbfilter.h"
+
+static void hexdump(const mbfl_string *ptr)
+{
+       unsigned int i;
+
+       for (i = 0; i < ptr->len; i++) {
+               printf("%%%02x", ptr->val[i]);
+       }
+
+       printf(" (%u)\n", ptr->len);
+}
+
+int main(int argc, char **argv)
+{
+       enum mbfl_no_encoding no_enc;
+       const enum mbfl_no_language no_lang = mbfl_no_language_japanese;
+       mbfl_memory_device dev;
+       mbfl_string string, result;
+       int final = 0;
+       int state = 0;
+       int mode = 0;
+
+       if (argc < 3) {
+               fprintf(stderr, "Usage: %s encoding flags\n", argv[0]);
+               return EXIT_FAILURE;
+       }
+
+       if ((no_enc = mbfl_name2no_encoding(argv[1])) ==
+                       mbfl_no_encoding_invalid) {
+               printf("Unsupported encoding: %s\n", argv[1]);
+               return EXIT_FAILURE;
+       }
+
+       {
+               const char *p;
+
+               for (p= argv[2] + strlen(argv[2]); p > argv[2]; ) {
+                       switch (*(--p)) {
+                       case 'A':
+                               mode |= 0x1;
+                               break;
+                       case 'a':
+                               mode |= 0x10;
+                               break;
+                       case 'R':
+                               mode |= 0x2;
+                               break;
+                       case 'r':
+                               mode |= 0x20;
+                               break;
+                       case 'N':
+                               mode |= 0x4;
+                               break;
+                       case 'n':
+                               mode |= 0x40;
+                               break;
+                       case 'S':
+                               mode |= 0x8;
+                               break;
+                       case 's':
+                               mode |= 0x80;
+                               break;
+                       case 'K':
+                               mode |= 0x100;
+                               break;
+                       case 'k':
+                               mode |= 0x1000;
+                               break;
+                       case 'H':
+                               mode |= 0x200;
+                               break;
+                       case 'h':
+                               mode |= 0x2000;
+                               break;
+                       case 'V':
+                               mode |= 0x800;
+                               break;
+                       case 'C':
+                               mode |= 0x10000;
+                               break;
+                       case 'c':
+                               mode |= 0x20000;
+                               break;
+                       case 'M':
+                               mode |= 0x100000;
+                               break;
+                       case 'm':
+                               mode |= 0x200000;
+                               break;
+                       }
+               }
+       }
+
+       do {
+               mbfl_memory_device_init(&dev, 0, 4096);
+               mbfl_string_init_set(&string, no_lang, no_enc);
+
+               for (;;) {
+                       const int c = fgetc(stdin);
+
+                       if (c == EOF) {
+                               final = 1;
+                               break;
+                       } else if (c == 10) {
+                               if (state == 1) {
+                                       state = 0;
+                                       continue;
+                               }
+                               break;
+                       } else if (c == 13) {
+                               state = 1;
+                               break;
+                       }
+
+                       if (dev.pos >= dev.length) {
+                               if (dev.length + dev.allocsz < dev.length) {
+                                       printf("Unable to allocate memory\n");
+                                       return EXIT_FAILURE;
+                               }
+
+                               mbfl_memory_device_realloc(&dev, dev.length + dev.allocsz,
+                                               dev.allocsz);
+                       }
+
+                       dev.buffer[dev.pos++] = (unsigned char)c;
+               }
+
+               mbfl_memory_device_result(&dev, &string);
+               mbfl_ja_jp_hantozen(&string, &result, mode);
+               hexdump(&result);
+               mbfl_string_clear(&result);
+               mbfl_string_clear(&string);
+       } while (!final);
+
+       return EXIT_SUCCESS;
+}
diff --git a/ext/mbstring/libmbfl/tests/strwidth.tests/conv_kana.tests/Makefile.am b/ext/mbstring/libmbfl/tests/strwidth.tests/conv_kana.tests/Makefile.am
new file mode 100644 (file)
index 0000000..37713c3
--- /dev/null
@@ -0,0 +1 @@
+EXTRA_DIST=*.exp
diff --git a/ext/mbstring/libmbfl/tests/strwidth.tests/conv_kana.tests/conv_kana.exp b/ext/mbstring/libmbfl/tests/strwidth.tests/conv_kana.tests/conv_kana.exp
new file mode 100644 (file)
index 0000000..a6459fe
--- /dev/null
@@ -0,0 +1,1098 @@
+#!/usr/bin/expect -f
+
+set timeout 1
+
+set test "full-width alphabets to half-width counterparts"
+spawn tests/conv_kana EUC-JP "r"
+expect_after {
+    "\[^\r\n\]*\r\n" { fail $test }
+}
+send "£á£â£ã£ä£å£æ£ç£è£é£ê£ë£ì£í£î£ï£ð£ñ£ò£ó£ô£õ£ö£÷£ø£ù£ú\r"
+expect {
+    -ex "%61%62%63%64%65%66%67%68%69%6a%6b%6c%6d%6e%6f%70%71%72%73%74%75%76%77%78%79%7a (26)\r\n" { pass $test }
+}
+send "£Á£Â£Ã£Ä£Å£Æ£Ç£È£É£Ê£Ë£Ì£Í£Î£Ï£Ð£Ñ£Ò£Ó£Ô£Õ£Ö£×£Ø£Ù£Ú\r"
+expect {
+    -ex "%41%42%43%44%45%46%47%48%49%4a%4b%4c%4d%4e%4f%50%51%52%53%54%55%56%57%58%59%5a (26)\r\n" { pass $test }
+}
+send "abcdefghijklmnopqrstuvwxyz\r"
+expect {
+    -ex "%61%62%63%64%65%66%67%68%69%6a%6b%6c%6d%6e%6f%70%71%72%73%74%75%76%77%78%79%7a (26)\r\n" { pass $test }
+}
+send "ABCDEFGHIJKLMNOPQRSTUVWXYZ\r"
+expect {
+    -ex "%41%42%43%44%45%46%47%48%49%4a%4b%4c%4d%4e%4f%50%51%52%53%54%55%56%57%58%59%5a (26)\r\n" { pass $test }
+}
+send "0123456789\r"
+expect {
+    -ex "%30%31%32%33%34%35%36%37%38%39 (10)\r\n" { pass $test }
+}
+send "£°£±£²£³£´£µ£¶£·£¸£¹\r"
+expect {
+    -ex "%a3%b1%a3%b2%a3%b3%a3%b4%a3%b5%a3%b6%a3%b7%a3%b8%a3%b9 (20)\r\n" { pass $test }
+}
+send "¡¡ \r"
+expect {
+    -ex "%a1%a1%20 (3)\r\n" { pass $test }
+}
+send "¥¢¥¤¥¦¥¨¥ª¥¬¥®¥°¥²¥´¥Ñ¥Ô¥×¥Ú¥Ý¥«¡«¥­¡«¥¯¡«¥±¡«¥³¡«¥Ï¡¬¥Ò¡¬¥Õ¡¬¥Ø¡¬¥Û¡¬\r"
+expect {
+    -ex "%a5%a2%a5%a4%a5%a6%a5%a8%a5%aa%a5%ac%a5%ae%a5%b0%a5%b2%a5%b4%a5%d1%a5%d4%a5%d7%a5%da%a5%dd%a5%ab%a1%ab%a5%ad%a1%ab%a5%af%a1%ab%a5%b1%a1%ab%a5%b3%a1%ab%a5%cf%a1%ac%a5%d2%a1%ac%a5%d5%a1%ac%a5%d8%a1%ac%a5%db%a1%ac (70)\r\n" { pass $test }
+}
+send "\8e±\8e²\8e³\8e´\8eµ\8e\8eÞ\8e·\8eÞ\8e¸\8eÞ\8e¹\8eÞ\8eº\8eÞ\8eÊ\8eß\8eË\8eß\8eÌ\8eß\8eÍ\8eß\8eÎ\8eß\r"
+expect {
+    -ex "%8e%b1%8e%b2%8e%b3%8e%b4%8e%b5%8e%b6%8e%de%8e%b7%8e%de%8e%b8%8e%de%8e%b9%8e%de%8e%ba%8e%de%8e%ca%8e%df%8e%cb%8e%df%8e%cc%8e%df%8e%cd%8e%df%8e%ce%8e%df (50)\r\n" { pass $test }
+}
+send "¤¢¤¤¤¦¤¨¤ª¤¬¤®¤°¤²¤´¤Ñ¤Ô¤×¤Ú¤Ý¤«¡«¤­¡«¤¯¡«¤±¡«¤³¡«¤Ï¡¬¤Ò¡¬¤Õ¡¬¤Ø¡¬¤Û¡¬\r"
+expect {
+    -ex "%a4%a2%a4%a4%a4%a6%a4%a8%a4%aa%a4%ac%a4%ae%a4%b0%a4%b2%a4%b4%a4%d1%a4%d4%a4%d7%a4%da%a4%dd%a4%ab%a1%ab%a4%ad%a1%ab%a4%af%a1%ab%a4%b1%a1%ab%a4%b3%a1%ab%a4%cf%a1%ac%a4%d2%a1%ac%a4%d5%a1%ac%a4%d8%a1%ac%a4%db%a1%ac (70)\r\n" { pass $test }
+}
+close
+
+set test "half-width alphabets to full-width counterparts"
+spawn tests/conv_kana EUC-JP "R"
+expect_after {
+    "\[^\r\n\]*\r\n" { fail $test }
+}
+send "£á£â£ã£ä£å£æ£ç£è£é£ê£ë£ì£í£î£ï£ð£ñ£ò£ó£ô£õ£ö£÷£ø£ù£ú\r"
+expect {
+    -ex "%a3%e1%a3%e2%a3%e3%a3%e4%a3%e5%a3%e6%a3%e7%a3%e8%a3%e9%a3%ea%a3%eb%a3%ec%a3%ed%a3%ee%a3%ef%a3%f0%a3%f1%a3%f2%a3%f3%a3%f4%a3%f5%a3%f6%a3%f7%a3%f8%a3%f9%a3%fa (52)\r\n" { pass $test }
+}
+send "£Á£Â£Ã£Ä£Å£Æ£Ç£È£É£Ê£Ë£Ì£Í£Î£Ï£Ð£Ñ£Ò£Ó£Ô£Õ£Ö£×£Ø£Ù£Ú\r"
+expect {
+    -ex "%a3%c1%a3%c2%a3%c3%a3%c4%a3%c5%a3%c6%a3%c7%a3%c8%a3%c9%a3%ca%a3%cb%a3%cc%a3%cd%a3%ce%a3%cf%a3%d0%a3%d1%a3%d2%a3%d3%a3%d4%a3%d5%a3%d6%a3%d7%a3%d8%a3%d9%a3%da (52)\r\n" { pass $test }
+}
+send "0123456789\r"
+expect {
+    -ex "%30%31%32%33%34%35%36%37%38%39 (10)\r\n" { pass $test }
+}
+send "£°£±£²£³£´£µ£¶£·£¸£¹\r"
+expect {
+    -ex "%a3%b1%a3%b2%a3%b3%a3%b4%a3%b5%a3%b6%a3%b7%a3%b8%a3%b9 (20)\r\n" { pass $test }
+}
+send "¡¡ \r"
+expect {
+    -ex "%a1%a1%20 (3)\r\n" { pass $test }
+}
+send "¥¢¥¤¥¦¥¨¥ª¥¬¥®¥°¥²¥´¥Ñ¥Ô¥×¥Ú¥Ý¥«¡«¥­¡«¥¯¡«¥±¡«¥³¡«¥Ï¡¬¥Ò¡¬¥Õ¡¬¥Ø¡¬¥Û¡¬\r"
+expect {
+    -ex "%a5%a2%a5%a4%a5%a6%a5%a8%a5%aa%a5%ac%a5%ae%a5%b0%a5%b2%a5%b4%a5%d1%a5%d4%a5%d7%a5%da%a5%dd%a5%ab%a1%ab%a5%ad%a1%ab%a5%af%a1%ab%a5%b1%a1%ab%a5%b3%a1%ab%a5%cf%a1%ac%a5%d2%a1%ac%a5%d5%a1%ac%a5%d8%a1%ac%a5%db%a1%ac (70)\r\n" { pass $test }
+}
+send "\8e±\8e²\8e³\8e´\8eµ\8e\8eÞ\8e·\8eÞ\8e¸\8eÞ\8e¹\8eÞ\8eº\8eÞ\8eÊ\8eß\8eË\8eß\8eÌ\8eß\8eÍ\8eß\8eÎ\8eß\r"
+expect {
+    -ex "%8e%b1%8e%b2%8e%b3%8e%b4%8e%b5%8e%b6%8e%de%8e%b7%8e%de%8e%b8%8e%de%8e%b9%8e%de%8e%ba%8e%de%8e%ca%8e%df%8e%cb%8e%df%8e%cc%8e%df%8e%cd%8e%df%8e%ce%8e%df (50)\r\n" { pass $test }
+}
+send "¤¢¤¤¤¦¤¨¤ª¤¬¤®¤°¤²¤´¤Ñ¤Ô¤×¤Ú¤Ý¤«¡«¤­¡«¤¯¡«¤±¡«¤³¡«¤Ï¡¬¤Ò¡¬¤Õ¡¬¤Ø¡¬¤Û¡¬\r"
+expect {
+    -ex "%a4%a2%a4%a4%a4%a6%a4%a8%a4%aa%a4%ac%a4%ae%a4%b0%a4%b2%a4%b4%a4%d1%a4%d4%a4%d7%a4%da%a4%dd%a4%ab%a1%ab%a4%ad%a1%ab%a4%af%a1%ab%a4%b1%a1%ab%a4%b3%a1%ab%a4%cf%a1%ac%a4%d2%a1%ac%a4%d5%a1%ac%a4%d8%a1%ac%a4%db%a1%ac (70)\r\n" { pass $test }
+}
+close
+
+set test "transliterate half-width alphabets to full-width counterparts and full-width to half-width at a time"
+spawn tests/conv_kana EUC-JP "Rr"
+expect_after {
+    "\[^\r\n\]*\r\n" { fail $test }
+}
+send "£á£â£ã£ä£å£æ£ç£è£é£ê£ë£ì£í£î£ï£ð£ñ£ò£ó£ô£õ£ö£÷£ø£ù£ú\r"
+expect {
+    -ex "%61%62%63%64%65%66%67%68%69%6a%6b%6c%6d%6e%6f%70%71%72%73%74%75%76%77%78%79%7a (26)\r\n" { pass $test }
+}
+send "£Á£Â£Ã£Ä£Å£Æ£Ç£È£É£Ê£Ë£Ì£Í£Î£Ï£Ð£Ñ£Ò£Ó£Ô£Õ£Ö£×£Ø£Ù£Ú\r"
+expect {
+    -ex "%41%42%43%44%45%46%47%48%49%4a%4b%4c%4d%4e%4f%50%51%52%53%54%55%56%57%58%59%5a (26)\r\n" { pass $test }
+}
+send "abcdefghijklmnopqrstuvwxyz\r"
+expect {
+    -ex "%a3%e1%a3%e2%a3%e3%a3%e4%a3%e5%a3%e6%a3%e7%a3%e8%a3%e9%a3%ea%a3%eb%a3%ec%a3%ed%a3%ee%a3%ef%a3%f0%a3%f1%a3%f2%a3%f3%a3%f4%a3%f5%a3%f6%a3%f7%a3%f8%a3%f9%a3%fa (52)\r\n" { pass $test }
+}
+send "ABCDEFGHIJKLMNOPQRSTUVWXYZ\r"
+expect {
+    -ex "%a3%c1%a3%c2%a3%c3%a3%c4%a3%c5%a3%c6%a3%c7%a3%c8%a3%c9%a3%ca%a3%cb%a3%cc%a3%cd%a3%ce%a3%cf%a3%d0%a3%d1%a3%d2%a3%d3%a3%d4%a3%d5%a3%d6%a3%d7%a3%d8%a3%d9%a3%da (52)\r\n" { pass $test }
+}
+send "0123456789\r"
+expect {
+    -ex "%30%31%32%33%34%35%36%37%38%39 (10)\r\n" { pass $test }
+}
+send "£°£±£²£³£´£µ£¶£·£¸£¹\r"
+expect {
+    -ex "%a3%b1%a3%b2%a3%b3%a3%b4%a3%b5%a3%b6%a3%b7%a3%b8%a3%b9 (20)\r\n" { pass $test }
+}
+send "¡¡ \r"
+expect {
+    -ex "%a1%a1%20 (3)\r\n" { pass $test }
+}
+send "¥¢¥¤¥¦¥¨¥ª¥¬¥®¥°¥²¥´¥Ñ¥Ô¥×¥Ú¥Ý¥«¡«¥­¡«¥¯¡«¥±¡«¥³¡«¥Ï¡¬¥Ò¡¬¥Õ¡¬¥Ø¡¬¥Û¡¬\r"
+expect {
+    -ex "%a5%a2%a5%a4%a5%a6%a5%a8%a5%aa%a5%ac%a5%ae%a5%b0%a5%b2%a5%b4%a5%d1%a5%d4%a5%d7%a5%da%a5%dd%a5%ab%a1%ab%a5%ad%a1%ab%a5%af%a1%ab%a5%b1%a1%ab%a5%b3%a1%ab%a5%cf%a1%ac%a5%d2%a1%ac%a5%d5%a1%ac%a5%d8%a1%ac%a5%db%a1%ac (70)\r\n" { pass $test }
+}
+send "\8e±\8e²\8e³\8e´\8eµ\8e\8eÞ\8e·\8eÞ\8e¸\8eÞ\8e¹\8eÞ\8eº\8eÞ\8eÊ\8eß\8eË\8eß\8eÌ\8eß\8eÍ\8eß\8eÎ\8eß\r"
+expect {
+    -ex "%8e%b1%8e%b2%8e%b3%8e%b4%8e%b5%8e%b6%8e%de%8e%b7%8e%de%8e%b8%8e%de%8e%b9%8e%de%8e%ba%8e%de%8e%ca%8e%df%8e%cb%8e%df%8e%cc%8e%df%8e%cd%8e%df%8e%ce%8e%df (50)\r\n" { pass $test }
+}
+send "¤¢¤¤¤¦¤¨¤ª¤¬¤®¤°¤²¤´¤Ñ¤Ô¤×¤Ú¤Ý¤«¡«¤­¡«¤¯¡«¤±¡«¤³¡«¤Ï¡¬¤Ò¡¬¤Õ¡¬¤Ø¡¬¤Û¡¬\r"
+expect {
+    -ex "%a4%a2%a4%a4%a4%a6%a4%a8%a4%aa%a4%ac%a4%ae%a4%b0%a4%b2%a4%b4%a4%d1%a4%d4%a4%d7%a4%da%a4%dd%a4%ab%a1%ab%a4%ad%a1%ab%a4%af%a1%ab%a4%b1%a1%ab%a4%b3%a1%ab%a4%cf%a1%ac%a4%d2%a1%ac%a4%d5%a1%ac%a4%d8%a1%ac%a4%db%a1%ac (70)\r\n" { pass $test }
+}
+close
+
+set test "full-width numerics to half-width counterparts"
+spawn tests/conv_kana EUC-JP "n"
+expect_after {
+    "\[^\r\n\]*\r\n" { fail $test }
+}
+send "£á£â£ã£ä£å£æ£ç£è£é£ê£ë£ì£í£î£ï£ð£ñ£ò£ó£ô£õ£ö£÷£ø£ù£ú\r"
+expect {
+    -ex "%a3%e1%a3%e2%a3%e3%a3%e4%a3%e5%a3%e6%a3%e7%a3%e8%a3%e9%a3%ea%a3%eb%a3%ec%a3%ed%a3%ee%a3%ef%a3%f0%a3%f1%a3%f2%a3%f3%a3%f4%a3%f5%a3%f6%a3%f7%a3%f8%a3%f9%a3%fa (52)\r\n" { pass $test }
+}
+send "£Á£Â£Ã£Ä£Å£Æ£Ç£È£É£Ê£Ë£Ì£Í£Î£Ï£Ð£Ñ£Ò£Ó£Ô£Õ£Ö£×£Ø£Ù£Ú\r"
+expect {
+    -ex "%a3%c1%a3%c2%a3%c3%a3%c4%a3%c5%a3%c6%a3%c7%a3%c8%a3%c9%a3%ca%a3%cb%a3%cc%a3%cd%a3%ce%a3%cf%a3%d0%a3%d1%a3%d2%a3%d3%a3%d4%a3%d5%a3%d6%a3%d7%a3%d8%a3%d9%a3%da (52)\r\n" { pass $test }
+}
+send "abcdefghijklmnopqrstuvwxyz\r"
+expect {
+    -ex "%61%62%63%64%65%66%67%68%69%6a%6b%6c%6d%6e%6f%70%71%72%73%74%75%76%77%78%79%7a (26)\r\n" { pass $test }
+}
+send "ABCDEFGHIJKLMNOPQRSTUVWXYZ\r"
+expect {
+    -ex "%41%42%43%44%45%46%47%48%49%4a%4b%4c%4d%4e%4f%50%51%52%53%54%55%56%57%58%59%5a (26)\r\n" { pass $test }
+}
+send "0123456789\r"
+expect {
+    -ex "%30%31%32%33%34%35%36%37%38%39 (10)\r\n" { pass $test }
+}
+send "£°£±£²£³£´£µ£¶£·£¸£¹\r"
+expect {
+    -ex "%30%31%32%33%34%35%36%37%38%39 (10)\r\n" { pass $test }
+}
+send "¡¡ \r"
+expect {
+    -ex "%a1%a1%20 (3)\r\n" { pass $test }
+}
+send "¥¢¥¤¥¦¥¨¥ª¥¬¥®¥°¥²¥´¥Ñ¥Ô¥×¥Ú¥Ý¥«¡«¥­¡«¥¯¡«¥±¡«¥³¡«¥Ï¡¬¥Ò¡¬¥Õ¡¬¥Ø¡¬¥Û¡¬\r"
+expect {
+    -ex "%a5%a2%a5%a4%a5%a6%a5%a8%a5%aa%a5%ac%a5%ae%a5%b0%a5%b2%a5%b4%a5%d1%a5%d4%a5%d7%a5%da%a5%dd%a5%ab%a1%ab%a5%ad%a1%ab%a5%af%a1%ab%a5%b1%a1%ab%a5%b3%a1%ab%a5%cf%a1%ac%a5%d2%a1%ac%a5%d5%a1%ac%a5%d8%a1%ac%a5%db%a1%ac (70)\r\n" { pass $test }
+}
+send "\8e±\8e²\8e³\8e´\8eµ\8e\8eÞ\8e·\8eÞ\8e¸\8eÞ\8e¹\8eÞ\8eº\8eÞ\8eÊ\8eß\8eË\8eß\8eÌ\8eß\8eÍ\8eß\8eÎ\8eß\r"
+expect {
+    -ex "%8e%b1%8e%b2%8e%b3%8e%b4%8e%b5%8e%b6%8e%de%8e%b7%8e%de%8e%b8%8e%de%8e%b9%8e%de%8e%ba%8e%de%8e%ca%8e%df%8e%cb%8e%df%8e%cc%8e%df%8e%cd%8e%df%8e%ce%8e%df (50)\r\n" { pass $test }
+}
+send "¤¢¤¤¤¦¤¨¤ª¤¬¤®¤°¤²¤´¤Ñ¤Ô¤×¤Ú¤Ý¤«¡«¤­¡«¤¯¡«¤±¡«¤³¡«¤Ï¡¬¤Ò¡¬¤Õ¡¬¤Ø¡¬¤Û¡¬\r"
+expect {
+    -ex "%a4%a2%a4%a4%a4%a6%a4%a8%a4%aa%a4%ac%a4%ae%a4%b0%a4%b2%a4%b4%a4%d1%a4%d4%a4%d7%a4%da%a4%dd%a4%ab%a1%ab%a4%ad%a1%ab%a4%af%a1%ab%a4%b1%a1%ab%a4%b3%a1%ab%a4%cf%a1%ac%a4%d2%a1%ac%a4%d5%a1%ac%a4%d8%a1%ac%a4%db%a1%ac (70)\r\n" { pass $test }
+}
+close
+
+set test "full-width numerics to half-width counterparts"
+spawn tests/conv_kana EUC-JP "n"
+expect_after {
+    "\[^\r\n\]*\r\n" { fail $test }
+}
+send "£á£â£ã£ä£å£æ£ç£è£é£ê£ë£ì£í£î£ï£ð£ñ£ò£ó£ô£õ£ö£÷£ø£ù£ú\r"
+expect {
+    -ex "%a3%e1%a3%e2%a3%e3%a3%e4%a3%e5%a3%e6%a3%e7%a3%e8%a3%e9%a3%ea%a3%eb%a3%ec%a3%ed%a3%ee%a3%ef%a3%f0%a3%f1%a3%f2%a3%f3%a3%f4%a3%f5%a3%f6%a3%f7%a3%f8%a3%f9%a3%fa (52)\r\n" { pass $test }
+}
+send "£Á£Â£Ã£Ä£Å£Æ£Ç£È£É£Ê£Ë£Ì£Í£Î£Ï£Ð£Ñ£Ò£Ó£Ô£Õ£Ö£×£Ø£Ù£Ú\r"
+expect {
+    -ex "%a3%c1%a3%c2%a3%c3%a3%c4%a3%c5%a3%c6%a3%c7%a3%c8%a3%c9%a3%ca%a3%cb%a3%cc%a3%cd%a3%ce%a3%cf%a3%d0%a3%d1%a3%d2%a3%d3%a3%d4%a3%d5%a3%d6%a3%d7%a3%d8%a3%d9%a3%da (52)\r\n" { pass $test }
+}
+send "abcdefghijklmnopqrstuvwxyz\r"
+expect {
+    -ex "%61%62%63%64%65%66%67%68%69%6a%6b%6c%6d%6e%6f%70%71%72%73%74%75%76%77%78%79%7a (26)\r\n" { pass $test }
+}
+send "ABCDEFGHIJKLMNOPQRSTUVWXYZ\r"
+expect {
+    -ex "%41%42%43%44%45%46%47%48%49%4a%4b%4c%4d%4e%4f%50%51%52%53%54%55%56%57%58%59%5a (26)\r\n" { pass $test }
+}
+send "0123456789\r"
+expect {
+    -ex "%30%31%32%33%34%35%36%37%38%39 (10)\r\n" { pass $test }
+}
+send "£°£±£²£³£´£µ£¶£·£¸£¹\r"
+expect {
+    -ex "%30%31%32%33%34%35%36%37%38%39 (10)\r\n" { pass $test }
+}
+send "¡¡ \r"
+expect {
+    -ex "%a1%a1%20 (3)\r\n" { pass $test }
+}
+send "¥¢¥¤¥¦¥¨¥ª¥¬¥®¥°¥²¥´¥Ñ¥Ô¥×¥Ú¥Ý¥«¡«¥­¡«¥¯¡«¥±¡«¥³¡«¥Ï¡¬¥Ò¡¬¥Õ¡¬¥Ø¡¬¥Û¡¬\r"
+expect {
+    -ex "%a5%a2%a5%a4%a5%a6%a5%a8%a5%aa%a5%ac%a5%ae%a5%b0%a5%b2%a5%b4%a5%d1%a5%d4%a5%d7%a5%da%a5%dd%a5%ab%a1%ab%a5%ad%a1%ab%a5%af%a1%ab%a5%b1%a1%ab%a5%b3%a1%ab%a5%cf%a1%ac%a5%d2%a1%ac%a5%d5%a1%ac%a5%d8%a1%ac%a5%db%a1%ac (70)\r\n" { pass $test }
+}
+send "\8e±\8e²\8e³\8e´\8eµ\8e\8eÞ\8e·\8eÞ\8e¸\8eÞ\8e¹\8eÞ\8eº\8eÞ\8eÊ\8eß\8eË\8eß\8eÌ\8eß\8eÍ\8eß\8eÎ\8eß\r"
+expect {
+    -ex "%8e%b1%8e%b2%8e%b3%8e%b4%8e%b5%8e%b6%8e%de%8e%b7%8e%de%8e%b8%8e%de%8e%b9%8e%de%8e%ba%8e%de%8e%ca%8e%df%8e%cb%8e%df%8e%cc%8e%df%8e%cd%8e%df%8e%ce%8e%df (50)\r\n" { pass $test }
+}
+send "¤¢¤¤¤¦¤¨¤ª¤¬¤®¤°¤²¤´¤Ñ¤Ô¤×¤Ú¤Ý¤«¡«¤­¡«¤¯¡«¤±¡«¤³¡«¤Ï¡¬¤Ò¡¬¤Õ¡¬¤Ø¡¬¤Û¡¬\r"
+expect {
+    -ex "%a4%a2%a4%a4%a4%a6%a4%a8%a4%aa%a4%ac%a4%ae%a4%b0%a4%b2%a4%b4%a4%d1%a4%d4%a4%d7%a4%da%a4%dd%a4%ab%a1%ab%a4%ad%a1%ab%a4%af%a1%ab%a4%b1%a1%ab%a4%b3%a1%ab%a4%cf%a1%ac%a4%d2%a1%ac%a4%d5%a1%ac%a4%d8%a1%ac%a4%db%a1%ac (70)\r\n" { pass $test }
+}
+close
+
+set test "half-width numerics to full-width counterparts"
+spawn tests/conv_kana EUC-JP "N"
+expect_after {
+    "\[^\r\n\]*\r\n" { fail $test }
+}
+send "£á£â£ã£ä£å£æ£ç£è£é£ê£ë£ì£í£î£ï£ð£ñ£ò£ó£ô£õ£ö£÷£ø£ù£ú\r"
+expect {
+    -ex "%a3%e1%a3%e2%a3%e3%a3%e4%a3%e5%a3%e6%a3%e7%a3%e8%a3%e9%a3%ea%a3%eb%a3%ec%a3%ed%a3%ee%a3%ef%a3%f0%a3%f1%a3%f2%a3%f3%a3%f4%a3%f5%a3%f6%a3%f7%a3%f8%a3%f9%a3%fa (52)\r\n" { pass $test }
+}
+send "£Á£Â£Ã£Ä£Å£Æ£Ç£È£É£Ê£Ë£Ì£Í£Î£Ï£Ð£Ñ£Ò£Ó£Ô£Õ£Ö£×£Ø£Ù£Ú\r"
+expect {
+    -ex "%a3%c1%a3%c2%a3%c3%a3%c4%a3%c5%a3%c6%a3%c7%a3%c8%a3%c9%a3%ca%a3%cb%a3%cc%a3%cd%a3%ce%a3%cf%a3%d0%a3%d1%a3%d2%a3%d3%a3%d4%a3%d5%a3%d6%a3%d7%a3%d8%a3%d9%a3%da (52)\r\n" { pass $test }
+}
+send "abcdefghijklmnopqrstuvwxyz\r"
+expect {
+    -ex "%61%62%63%64%65%66%67%68%69%6a%6b%6c%6d%6e%6f%70%71%72%73%74%75%76%77%78%79%7a (26)\r\n" { pass $test }
+}
+send "ABCDEFGHIJKLMNOPQRSTUVWXYZ\r"
+expect {
+    -ex "%41%42%43%44%45%46%47%48%49%4a%4b%4c%4d%4e%4f%50%51%52%53%54%55%56%57%58%59%5a (26)\r\n" { pass $test }
+}
+send "0123456789\r"
+expect {
+    -ex "%a3%b1%a3%b2%a3%b3%a3%b4%a3%b5%a3%b6%a3%b7%a3%b8%a3%b9 (20)\r\n" { pass $test }
+}
+send "£°£±£²£³£´£µ£¶£·£¸£¹\r"
+expect {
+    -ex "%a3%b1%a3%b2%a3%b3%a3%b4%a3%b5%a3%b6%a3%b7%a3%b8%a3%b9 (20)\r\n" { pass $test }
+}
+send "¡¡ \r"
+expect {
+    -ex "%a1%a1%20 (3)\r\n" { pass $test }
+}
+send "¥¢¥¤¥¦¥¨¥ª¥¬¥®¥°¥²¥´¥Ñ¥Ô¥×¥Ú¥Ý¥«¡«¥­¡«¥¯¡«¥±¡«¥³¡«¥Ï¡¬¥Ò¡¬¥Õ¡¬¥Ø¡¬¥Û¡¬\r"
+expect {
+    -ex "%a5%a2%a5%a4%a5%a6%a5%a8%a5%aa%a5%ac%a5%ae%a5%b0%a5%b2%a5%b4%a5%d1%a5%d4%a5%d7%a5%da%a5%dd%a5%ab%a1%ab%a5%ad%a1%ab%a5%af%a1%ab%a5%b1%a1%ab%a5%b3%a1%ab%a5%cf%a1%ac%a5%d2%a1%ac%a5%d5%a1%ac%a5%d8%a1%ac%a5%db%a1%ac (70)\r\n" { pass $test }
+}
+send "\8e±\8e²\8e³\8e´\8eµ\8e\8eÞ\8e·\8eÞ\8e¸\8eÞ\8e¹\8eÞ\8eº\8eÞ\8eÊ\8eß\8eË\8eß\8eÌ\8eß\8eÍ\8eß\8eÎ\8eß\r"
+expect {
+    -ex "%8e%b1%8e%b2%8e%b3%8e%b4%8e%b5%8e%b6%8e%de%8e%b7%8e%de%8e%b8%8e%de%8e%b9%8e%de%8e%ba%8e%de%8e%ca%8e%df%8e%cb%8e%df%8e%cc%8e%df%8e%cd%8e%df%8e%ce%8e%df (50)\r\n" { pass $test }
+}
+send "¤¢¤¤¤¦¤¨¤ª¤¬¤®¤°¤²¤´¤Ñ¤Ô¤×¤Ú¤Ý¤«¡«¤­¡«¤¯¡«¤±¡«¤³¡«¤Ï¡¬¤Ò¡¬¤Õ¡¬¤Ø¡¬¤Û¡¬\r"
+expect {
+    -ex "%a4%a2%a4%a4%a4%a6%a4%a8%a4%aa%a4%ac%a4%ae%a4%b0%a4%b2%a4%b4%a4%d1%a4%d4%a4%d7%a4%da%a4%dd%a4%ab%a1%ab%a4%ad%a1%ab%a4%af%a1%ab%a4%b1%a1%ab%a4%b3%a1%ab%a4%cf%a1%ac%a4%d2%a1%ac%a4%d5%a1%ac%a4%d8%a1%ac%a4%db%a1%ac (70)\r\n" { pass $test }
+}
+close
+
+set test "transliterate half-width numerics to full-width counterparts and full-width to half-width at a time"
+spawn tests/conv_kana EUC-JP "nN"
+expect_after {
+    "\[^\r\n\]*\r\n" { fail $test }
+}
+send "£á£â£ã£ä£å£æ£ç£è£é£ê£ë£ì£í£î£ï£ð£ñ£ò£ó£ô£õ£ö£÷£ø£ù£ú\r"
+expect {
+    -ex "%a3%e1%a3%e2%a3%e3%a3%e4%a3%e5%a3%e6%a3%e7%a3%e8%a3%e9%a3%ea%a3%eb%a3%ec%a3%ed%a3%ee%a3%ef%a3%f0%a3%f1%a3%f2%a3%f3%a3%f4%a3%f5%a3%f6%a3%f7%a3%f8%a3%f9%a3%fa (52)\r\n" { pass $test }
+}
+send "£Á£Â£Ã£Ä£Å£Æ£Ç£È£É£Ê£Ë£Ì£Í£Î£Ï£Ð£Ñ£Ò£Ó£Ô£Õ£Ö£×£Ø£Ù£Ú\r"
+expect {
+    -ex "%a3%c1%a3%c2%a3%c3%a3%c4%a3%c5%a3%c6%a3%c7%a3%c8%a3%c9%a3%ca%a3%cb%a3%cc%a3%cd%a3%ce%a3%cf%a3%d0%a3%d1%a3%d2%a3%d3%a3%d4%a3%d5%a3%d6%a3%d7%a3%d8%a3%d9%a3%da (52)\r\n" { pass $test }
+}
+send "abcdefghijklmnopqrstuvwxyz\r"
+expect {
+    -ex "%61%62%63%64%65%66%67%68%69%6a%6b%6c%6d%6e%6f%70%71%72%73%74%75%76%77%78%79%7a (26)\r\n" { pass $test }
+}
+send "ABCDEFGHIJKLMNOPQRSTUVWXYZ\r"
+expect {
+    -ex "%41%42%43%44%45%46%47%48%49%4a%4b%4c%4d%4e%4f%50%51%52%53%54%55%56%57%58%59%5a (26)\r\n" { pass $test }
+}
+send "0123456789\r"
+expect {
+    -ex "%a3%b0%a3%b1%a3%b2%a3%b3%a3%b4%a3%b5%a3%b6%a3%b7%a3%b8%a3%b9 (20)\r\n" { pass $test }
+}
+send "£°£±£²£³£´£µ£¶£·£¸£¹\r"
+expect {
+    -ex "%30%31%32%33%34%35%36%37%38%39 (10)\r\n" { pass $test }
+}
+send "¡¡ \r"
+expect {
+    -ex "%a1%a1%20 (3)\r\n" { pass $test }
+}
+send "¥¢¥¤¥¦¥¨¥ª¥¬¥®¥°¥²¥´¥Ñ¥Ô¥×¥Ú¥Ý¥«¡«¥­¡«¥¯¡«¥±¡«¥³¡«¥Ï¡¬¥Ò¡¬¥Õ¡¬¥Ø¡¬¥Û¡¬\r"
+expect {
+    -ex "%a5%a2%a5%a4%a5%a6%a5%a8%a5%aa%a5%ac%a5%ae%a5%b0%a5%b2%a5%b4%a5%d1%a5%d4%a5%d7%a5%da%a5%dd%a5%ab%a1%ab%a5%ad%a1%ab%a5%af%a1%ab%a5%b1%a1%ab%a5%b3%a1%ab%a5%cf%a1%ac%a5%d2%a1%ac%a5%d5%a1%ac%a5%d8%a1%ac%a5%db%a1%ac (70)\r\n" { pass $test }
+}
+send "\8e±\8e²\8e³\8e´\8eµ\8e\8eÞ\8e·\8eÞ\8e¸\8eÞ\8e¹\8eÞ\8eº\8eÞ\8eÊ\8eß\8eË\8eß\8eÌ\8eß\8eÍ\8eß\8eÎ\8eß\r"
+expect {
+    -ex "%8e%b1%8e%b2%8e%b3%8e%b4%8e%b5%8e%b6%8e%de%8e%b7%8e%de%8e%b8%8e%de%8e%b9%8e%de%8e%ba%8e%de%8e%ca%8e%df%8e%cb%8e%df%8e%cc%8e%df%8e%cd%8e%df%8e%ce%8e%df (50)\r\n" { pass $test }
+}
+send "¤¢¤¤¤¦¤¨¤ª¤¬¤®¤°¤²¤´¤Ñ¤Ô¤×¤Ú¤Ý¤«¡«¤­¡«¤¯¡«¤±¡«¤³¡«¤Ï¡¬¤Ò¡¬¤Õ¡¬¤Ø¡¬¤Û¡¬\r"
+expect {
+    -ex "%a4%a2%a4%a4%a4%a6%a4%a8%a4%aa%a4%ac%a4%ae%a4%b0%a4%b2%a4%b4%a4%d1%a4%d4%a4%d7%a4%da%a4%dd%a4%ab%a1%ab%a4%ad%a1%ab%a4%af%a1%ab%a4%b1%a1%ab%a4%b3%a1%ab%a4%cf%a1%ac%a4%d2%a1%ac%a4%d5%a1%ac%a4%d8%a1%ac%a4%db%a1%ac (70)\r\n" { pass $test }
+}
+close
+
+set test "full-width alphanumerics to half-width counterparts"
+spawn tests/conv_kana EUC-JP "a"
+expect_after {
+    "\[^\r\n\]*\r\n" { fail $test }
+}
+send "£á£â£ã£ä£å£æ£ç£è£é£ê£ë£ì£í£î£ï£ð£ñ£ò£ó£ô£õ£ö£÷£ø£ù£ú\r"
+expect {
+    -ex "%61%62%63%64%65%66%67%68%69%6a%6b%6c%6d%6e%6f%70%71%72%73%74%75%76%77%78%79%7a (26)\r\n" { pass $test }
+}
+send "£Á£Â£Ã£Ä£Å£Æ£Ç£È£É£Ê£Ë£Ì£Í£Î£Ï£Ð£Ñ£Ò£Ó£Ô£Õ£Ö£×£Ø£Ù£Ú\r"
+expect {
+    -ex "%41%42%43%44%45%46%47%48%49%4a%4b%4c%4d%4e%4f%50%51%52%53%54%55%56%57%58%59%5a (26)\r\n" { pass $test }
+}
+send "abcdefghijklmnopqrstuvwxyz\r"
+expect {
+    -ex "%61%62%63%64%65%66%67%68%69%6a%6b%6c%6d%6e%6f%70%71%72%73%74%75%76%77%78%79%7a (26)\r\n" { pass $test }
+}
+send "ABCDEFGHIJKLMNOPQRSTUVWXYZ\r"
+expect {
+    -ex "%41%42%43%44%45%46%47%48%49%4a%4b%4c%4d%4e%4f%50%51%52%53%54%55%56%57%58%59%5a (26)\r\n" { pass $test }
+}
+send "0123456789\r"
+expect {
+    -ex "%30%31%32%33%34%35%36%37%38%39 (10)\r\n" { pass $test }
+}
+send "£°£±£²£³£´£µ£¶£·£¸£¹\r"
+expect {
+    -ex "%30%31%32%33%34%35%36%37%38%39 (10)\r\n" { pass $test }
+}
+send "¡¡ \r"
+expect {
+    -ex "%a1%a1%20 (3)\r\n" { pass $test }
+}
+send "¥¢¥¤¥¦¥¨¥ª¥¬¥®¥°¥²¥´¥Ñ¥Ô¥×¥Ú¥Ý¥«¡«¥­¡«¥¯¡«¥±¡«¥³¡«¥Ï¡¬¥Ò¡¬¥Õ¡¬¥Ø¡¬¥Û¡¬\r"
+expect {
+    -ex "%a5%a2%a5%a4%a5%a6%a5%a8%a5%aa%a5%ac%a5%ae%a5%b0%a5%b2%a5%b4%a5%d1%a5%d4%a5%d7%a5%da%a5%dd%a5%ab%a1%ab%a5%ad%a1%ab%a5%af%a1%ab%a5%b1%a1%ab%a5%b3%a1%ab%a5%cf%a1%ac%a5%d2%a1%ac%a5%d5%a1%ac%a5%d8%a1%ac%a5%db%a1%ac (70)\r\n" { pass $test }
+}
+send "\8e±\8e²\8e³\8e´\8eµ\8e\8eÞ\8e·\8eÞ\8e¸\8eÞ\8e¹\8eÞ\8eº\8eÞ\8eÊ\8eß\8eË\8eß\8eÌ\8eß\8eÍ\8eß\8eÎ\8eß\r"
+expect {
+    -ex "%8e%b1%8e%b2%8e%b3%8e%b4%8e%b5%8e%b6%8e%de%8e%b7%8e%de%8e%b8%8e%de%8e%b9%8e%de%8e%ba%8e%de%8e%ca%8e%df%8e%cb%8e%df%8e%cc%8e%df%8e%cd%8e%df%8e%ce%8e%df (50)\r\n" { pass $test }
+}
+send "¤¢¤¤¤¦¤¨¤ª¤¬¤®¤°¤²¤´¤Ñ¤Ô¤×¤Ú¤Ý¤«¡«¤­¡«¤¯¡«¤±¡«¤³¡«¤Ï¡¬¤Ò¡¬¤Õ¡¬¤Ø¡¬¤Û¡¬\r"
+expect {
+    -ex "%a4%a2%a4%a4%a4%a6%a4%a8%a4%aa%a4%ac%a4%ae%a4%b0%a4%b2%a4%b4%a4%d1%a4%d4%a4%d7%a4%da%a4%dd%a4%ab%a1%ab%a4%ad%a1%ab%a4%af%a1%ab%a4%b1%a1%ab%a4%b3%a1%ab%a4%cf%a1%ac%a4%d2%a1%ac%a4%d5%a1%ac%a4%d8%a1%ac%a4%db%a1%ac (70)\r\n" { pass $test }
+}
+close
+
+set test "half-width alphanumerics to full-width counterparts"
+spawn tests/conv_kana EUC-JP "A"
+expect_after {
+    "\[^\r\n\]*\r\n" { fail $test }
+}
+send "£á£â£ã£ä£å£æ£ç£è£é£ê£ë£ì£í£î£ï£ð£ñ£ò£ó£ô£õ£ö£÷£ø£ù£ú\r"
+expect {
+    -ex "%a3%e1%a3%e2%a3%e3%a3%e4%a3%e5%a3%e6%a3%e7%a3%e8%a3%e9%a3%ea%a3%eb%a3%ec%a3%ed%a3%ee%a3%ef%a3%f0%a3%f1%a3%f2%a3%f3%a3%f4%a3%f5%a3%f6%a3%f7%a3%f8%a3%f9%a3%fa (52)\r\n" { pass $test }
+}
+send "£Á£Â£Ã£Ä£Å£Æ£Ç£È£É£Ê£Ë£Ì£Í£Î£Ï£Ð£Ñ£Ò£Ó£Ô£Õ£Ö£×£Ø£Ù£Ú\r"
+expect {
+    -ex "%a3%c1%a3%c2%a3%c3%a3%c4%a3%c5%a3%c6%a3%c7%a3%c8%a3%c9%a3%ca%a3%cb%a3%cc%a3%cd%a3%ce%a3%cf%a3%d0%a3%d1%a3%d2%a3%d3%a3%d4%a3%d5%a3%d6%a3%d7%a3%d8%a3%d9%a3%da (52)\r\n" { pass $test }
+}
+send "0123456789\r"
+expect {
+    -ex "%a3%b1%a3%b2%a3%b3%a3%b4%a3%b5%a3%b6%a3%b7%a3%b8%a3%b9 (20)\r\n" { pass $test }
+}
+send "£°£±£²£³£´£µ£¶£·£¸£¹\r"
+expect {
+    -ex "%a3%b1%a3%b2%a3%b3%a3%b4%a3%b5%a3%b6%a3%b7%a3%b8%a3%b9 (20)\r\n" { pass $test }
+}
+send "¥¢¥¤¥¦¥¨¥ª¥¬¥®¥°¥²¥´¥Ñ¥Ô¥×¥Ú¥Ý¥«¡«¥­¡«¥¯¡«¥±¡«¥³¡«¥Ï¡¬¥Ò¡¬¥Õ¡¬¥Ø¡¬¥Û¡¬\r"
+expect {
+    -ex "%a5%a2%a5%a4%a5%a6%a5%a8%a5%aa%a5%ac%a5%ae%a5%b0%a5%b2%a5%b4%a5%d1%a5%d4%a5%d7%a5%da%a5%dd%a5%ab%a1%ab%a5%ad%a1%ab%a5%af%a1%ab%a5%b1%a1%ab%a5%b3%a1%ab%a5%cf%a1%ac%a5%d2%a1%ac%a5%d5%a1%ac%a5%d8%a1%ac%a5%db%a1%ac (70)\r\n" { pass $test }
+}
+send "\8e±\8e²\8e³\8e´\8eµ\8e\8eÞ\8e·\8eÞ\8e¸\8eÞ\8e¹\8eÞ\8eº\8eÞ\8eÊ\8eß\8eË\8eß\8eÌ\8eß\8eÍ\8eß\8eÎ\8eß\r"
+expect {
+    -ex "%8e%b1%8e%b2%8e%b3%8e%b4%8e%b5%8e%b6%8e%de%8e%b7%8e%de%8e%b8%8e%de%8e%b9%8e%de%8e%ba%8e%de%8e%ca%8e%df%8e%cb%8e%df%8e%cc%8e%df%8e%cd%8e%df%8e%ce%8e%df (50)\r\n" { pass $test }
+}
+send "¤¢¤¤¤¦¤¨¤ª¤¬¤®¤°¤²¤´¤Ñ¤Ô¤×¤Ú¤Ý¤«¡«¤­¡«¤¯¡«¤±¡«¤³¡«¤Ï¡¬¤Ò¡¬¤Õ¡¬¤Ø¡¬¤Û¡¬\r"
+expect {
+    -ex "%a4%a2%a4%a4%a4%a6%a4%a8%a4%aa%a4%ac%a4%ae%a4%b0%a4%b2%a4%b4%a4%d1%a4%d4%a4%d7%a4%da%a4%dd%a4%ab%a1%ab%a4%ad%a1%ab%a4%af%a1%ab%a4%b1%a1%ab%a4%b3%a1%ab%a4%cf%a1%ac%a4%d2%a1%ac%a4%d5%a1%ac%a4%d8%a1%ac%a4%db%a1%ac (70)\r\n" { pass $test }
+}
+close
+
+set test "transliterate half-width alphanumerics to full-width counterparts and full-width to half-width at a time"
+spawn tests/conv_kana EUC-JP "Aa"
+expect_after {
+    "\[^\r\n\]*\r\n" { fail $test }
+}
+send "£á£â£ã£ä£å£æ£ç£è£é£ê£ë£ì£í£î£ï£ð£ñ£ò£ó£ô£õ£ö£÷£ø£ù£ú\r"
+expect {
+    -ex "%61%62%63%64%65%66%67%68%69%6a%6b%6c%6d%6e%6f%70%71%72%73%74%75%76%77%78%79%7a (26)\r\n" { pass $test }
+}
+send "£Á£Â£Ã£Ä£Å£Æ£Ç£È£É£Ê£Ë£Ì£Í£Î£Ï£Ð£Ñ£Ò£Ó£Ô£Õ£Ö£×£Ø£Ù£Ú\r"
+expect {
+    -ex "%41%42%43%44%45%46%47%48%49%4a%4b%4c%4d%4e%4f%50%51%52%53%54%55%56%57%58%59%5a (26)\r\n" { pass $test }
+}
+send "abcdefghijklmnopqrstuvwxyz\r"
+expect {
+    -ex "%a3%e1%a3%e2%a3%e3%a3%e4%a3%e5%a3%e6%a3%e7%a3%e8%a3%e9%a3%ea%a3%eb%a3%ec%a3%ed%a3%ee%a3%ef%a3%f0%a3%f1%a3%f2%a3%f3%a3%f4%a3%f5%a3%f6%a3%f7%a3%f8%a3%f9%a3%fa (52)\r\n" { pass $test }
+}
+send "ABCDEFGHIJKLMNOPQRSTUVWXYZ\r"
+expect {
+    -ex "%a3%c1%a3%c2%a3%c3%a3%c4%a3%c5%a3%c6%a3%c7%a3%c8%a3%c9%a3%ca%a3%cb%a3%cc%a3%cd%a3%ce%a3%cf%a3%d0%a3%d1%a3%d2%a3%d3%a3%d4%a3%d5%a3%d6%a3%d7%a3%d8%a3%d9%a3%da (52)\r\n" { pass $test }
+}
+send "0123456789\r"
+expect {
+    -ex "%a3%b0%a3%b1%a3%b2%a3%b3%a3%b4%a3%b5%a3%b6%a3%b7%a3%b8%a3%b9 (20)\r\n" { pass $test }
+}
+send "£°£±£²£³£´£µ£¶£·£¸£¹\r"
+expect {
+    -ex "%30%31%32%33%34%35%36%37%38%39 (10)\r\n" { pass $test }
+}
+send "¡¡ \r"
+expect {
+    -ex "%a1%a1%20 (3)\r\n" { pass $test }
+}
+send "¥¢¥¤¥¦¥¨¥ª¥¬¥®¥°¥²¥´¥Ñ¥Ô¥×¥Ú¥Ý¥«¡«¥­¡«¥¯¡«¥±¡«¥³¡«¥Ï¡¬¥Ò¡¬¥Õ¡¬¥Ø¡¬¥Û¡¬\r"
+expect {
+    -ex "%a5%a2%a5%a4%a5%a6%a5%a8%a5%aa%a5%ac%a5%ae%a5%b0%a5%b2%a5%b4%a5%d1%a5%d4%a5%d7%a5%da%a5%dd%a5%ab%a1%ab%a5%ad%a1%ab%a5%af%a1%ab%a5%b1%a1%ab%a5%b3%a1%ab%a5%cf%a1%ac%a5%d2%a1%ac%a5%d5%a1%ac%a5%d8%a1%ac%a5%db%a1%ac (70)\r\n" { pass $test }
+}
+send "\8e±\8e²\8e³\8e´\8eµ\8e\8eÞ\8e·\8eÞ\8e¸\8eÞ\8e¹\8eÞ\8eº\8eÞ\8eÊ\8eß\8eË\8eß\8eÌ\8eß\8eÍ\8eß\8eÎ\8eß\r"
+expect {
+    -ex "%8e%b1%8e%b2%8e%b3%8e%b4%8e%b5%8e%b6%8e%de%8e%b7%8e%de%8e%b8%8e%de%8e%b9%8e%de%8e%ba%8e%de%8e%ca%8e%df%8e%cb%8e%df%8e%cc%8e%df%8e%cd%8e%df%8e%ce%8e%df (50)\r\n" { pass $test }
+}
+send "¤¢¤¤¤¦¤¨¤ª¤¬¤®¤°¤²¤´¤Ñ¤Ô¤×¤Ú¤Ý¤«¡«¤­¡«¤¯¡«¤±¡«¤³¡«¤Ï¡¬¤Ò¡¬¤Õ¡¬¤Ø¡¬¤Û¡¬\r"
+expect {
+    -ex "%a4%a2%a4%a4%a4%a6%a4%a8%a4%aa%a4%ac%a4%ae%a4%b0%a4%b2%a4%b4%a4%d1%a4%d4%a4%d7%a4%da%a4%dd%a4%ab%a1%ab%a4%ad%a1%ab%a4%af%a1%ab%a4%b1%a1%ab%a4%b3%a1%ab%a4%cf%a1%ac%a4%d2%a1%ac%a4%d5%a1%ac%a4%d8%a1%ac%a4%db%a1%ac (70)\r\n" { pass $test }
+}
+close
+
+set test "full-width spaces to half-width counterparts"
+spawn tests/conv_kana EUC-JP "s"
+expect_after {
+    "\[^\r\n\]*\r\n" { fail $test }
+}
+send "£á£â£ã£ä£å£æ£ç£è£é£ê£ë£ì£í£î£ï£ð£ñ£ò£ó£ô£õ£ö£÷£ø£ù£ú\r"
+expect {
+    -ex "%a3%e1%a3%e2%a3%e3%a3%e4%a3%e5%a3%e6%a3%e7%a3%e8%a3%e9%a3%ea%a3%eb%a3%ec%a3%ed%a3%ee%a3%ef%a3%f0%a3%f1%a3%f2%a3%f3%a3%f4%a3%f5%a3%f6%a3%f7%a3%f8%a3%f9%a3%fa (52)\r\n" { pass $test }
+}
+send "£Á£Â£Ã£Ä£Å£Æ£Ç£È£É£Ê£Ë£Ì£Í£Î£Ï£Ð£Ñ£Ò£Ó£Ô£Õ£Ö£×£Ø£Ù£Ú\r"
+expect {
+    -ex "%a3%c1%a3%c2%a3%c3%a3%c4%a3%c5%a3%c6%a3%c7%a3%c8%a3%c9%a3%ca%a3%cb%a3%cc%a3%cd%a3%ce%a3%cf%a3%d0%a3%d1%a3%d2%a3%d3%a3%d4%a3%d5%a3%d6%a3%d7%a3%d8%a3%d9%a3%da (52)\r\n" { pass $test }
+}
+send "abcdefghijklmnopqrstuvwxyz\r"
+expect {
+    -ex "%61%62%63%64%65%66%67%68%69%6a%6b%6c%6d%6e%6f%70%71%72%73%74%75%76%77%78%79%7a (26)\r\n" { pass $test }
+}
+send "ABCDEFGHIJKLMNOPQRSTUVWXYZ\r"
+expect {
+    -ex "%41%42%43%44%45%46%47%48%49%4a%4b%4c%4d%4e%4f%50%51%52%53%54%55%56%57%58%59%5a (26)\r\n" { pass $test }
+}
+send "0123456789\r"
+expect {
+    -ex "%30%31%32%33%34%35%36%37%38%39 (10)\r\n" { pass $test }
+}
+send "£°£±£²£³£´£µ£¶£·£¸£¹\r"
+expect {
+    -ex "%a3%b1%a3%b2%a3%b3%a3%b4%a3%b5%a3%b6%a3%b7%a3%b8%a3%b9 (20)\r\n" { pass $test }
+}
+send "¡¡ \r"
+expect {
+    -ex "%20%20 (2)\r\n" { pass $test }
+}
+send "¥¢¥¤¥¦¥¨¥ª¥¬¥®¥°¥²¥´¥Ñ¥Ô¥×¥Ú¥Ý¥«¡«¥­¡«¥¯¡«¥±¡«¥³¡«¥Ï¡¬¥Ò¡¬¥Õ¡¬¥Ø¡¬¥Û¡¬\r"
+expect {
+    -ex "%a5%a2%a5%a4%a5%a6%a5%a8%a5%aa%a5%ac%a5%ae%a5%b0%a5%b2%a5%b4%a5%d1%a5%d4%a5%d7%a5%da%a5%dd%a5%ab%a1%ab%a5%ad%a1%ab%a5%af%a1%ab%a5%b1%a1%ab%a5%b3%a1%ab%a5%cf%a1%ac%a5%d2%a1%ac%a5%d5%a1%ac%a5%d8%a1%ac%a5%db%a1%ac (70)\r\n" { pass $test }
+}
+send "\8e±\8e²\8e³\8e´\8eµ\8e\8eÞ\8e·\8eÞ\8e¸\8eÞ\8e¹\8eÞ\8eº\8eÞ\8eÊ\8eß\8eË\8eß\8eÌ\8eß\8eÍ\8eß\8eÎ\8eß\r"
+expect {
+    -ex "%8e%b1%8e%b2%8e%b3%8e%b4%8e%b5%8e%b6%8e%de%8e%b7%8e%de%8e%b8%8e%de%8e%b9%8e%de%8e%ba%8e%de%8e%ca%8e%df%8e%cb%8e%df%8e%cc%8e%df%8e%cd%8e%df%8e%ce%8e%df (50)\r\n" { pass $test }
+}
+send "¤¢¤¤¤¦¤¨¤ª¤¬¤®¤°¤²¤´¤Ñ¤Ô¤×¤Ú¤Ý¤«¡«¤­¡«¤¯¡«¤±¡«¤³¡«¤Ï¡¬¤Ò¡¬¤Õ¡¬¤Ø¡¬¤Û¡¬\r"
+expect {
+    -ex "%a4%a2%a4%a4%a4%a6%a4%a8%a4%aa%a4%ac%a4%ae%a4%b0%a4%b2%a4%b4%a4%d1%a4%d4%a4%d7%a4%da%a4%dd%a4%ab%a1%ab%a4%ad%a1%ab%a4%af%a1%ab%a4%b1%a1%ab%a4%b3%a1%ab%a4%cf%a1%ac%a4%d2%a1%ac%a4%d5%a1%ac%a4%d8%a1%ac%a4%db%a1%ac (70)\r\n" { pass $test }
+}
+close
+
+set test "half-width spaces to full-width counterparts"
+spawn tests/conv_kana EUC-JP "S"
+expect_after {
+    "\[^\r\n\]*\r\n" { fail $test }
+}
+send "£á£â£ã£ä£å£æ£ç£è£é£ê£ë£ì£í£î£ï£ð£ñ£ò£ó£ô£õ£ö£÷£ø£ù£ú\r"
+expect {
+    -ex "%a3%e1%a3%e2%a3%e3%a3%e4%a3%e5%a3%e6%a3%e7%a3%e8%a3%e9%a3%ea%a3%eb%a3%ec%a3%ed%a3%ee%a3%ef%a3%f0%a3%f1%a3%f2%a3%f3%a3%f4%a3%f5%a3%f6%a3%f7%a3%f8%a3%f9%a3%fa (52)\r\n" { pass $test }
+}
+send "£Á£Â£Ã£Ä£Å£Æ£Ç£È£É£Ê£Ë£Ì£Í£Î£Ï£Ð£Ñ£Ò£Ó£Ô£Õ£Ö£×£Ø£Ù£Ú\r"
+expect {
+    -ex "%a3%c1%a3%c2%a3%c3%a3%c4%a3%c5%a3%c6%a3%c7%a3%c8%a3%c9%a3%ca%a3%cb%a3%cc%a3%cd%a3%ce%a3%cf%a3%d0%a3%d1%a3%d2%a3%d3%a3%d4%a3%d5%a3%d6%a3%d7%a3%d8%a3%d9%a3%da (52)\r\n" { pass $test }
+}
+send "0123456789\r"
+expect {
+    -ex "%30%31%32%33%34%35%36%37%38%39 (10)\r\n" { pass $test }
+}
+send "£°£±£²£³£´£µ£¶£·£¸£¹\r"
+expect {
+    -ex "%a3%b1%a3%b2%a3%b3%a3%b4%a3%b5%a3%b6%a3%b7%a3%b8%a3%b9 (20)\r\n" { pass $test }
+}
+send "¡¡ \r"
+expect {
+    -ex "%a1%a1%a1%a1 (4)\r\n" { pass $test }
+}
+send "¥¢¥¤¥¦¥¨¥ª¥¬¥®¥°¥²¥´¥Ñ¥Ô¥×¥Ú¥Ý¥«¡«¥­¡«¥¯¡«¥±¡«¥³¡«¥Ï¡¬¥Ò¡¬¥Õ¡¬¥Ø¡¬¥Û¡¬\r"
+expect {
+    -ex "%a5%a2%a5%a4%a5%a6%a5%a8%a5%aa%a5%ac%a5%ae%a5%b0%a5%b2%a5%b4%a5%d1%a5%d4%a5%d7%a5%da%a5%dd%a5%ab%a1%ab%a5%ad%a1%ab%a5%af%a1%ab%a5%b1%a1%ab%a5%b3%a1%ab%a5%cf%a1%ac%a5%d2%a1%ac%a5%d5%a1%ac%a5%d8%a1%ac%a5%db%a1%ac (70)\r\n" { pass $test }
+}
+send "\8e±\8e²\8e³\8e´\8eµ\8e\8eÞ\8e·\8eÞ\8e¸\8eÞ\8e¹\8eÞ\8eº\8eÞ\8eÊ\8eß\8eË\8eß\8eÌ\8eß\8eÍ\8eß\8eÎ\8eß\r"
+expect {
+    -ex "%8e%b1%8e%b2%8e%b3%8e%b4%8e%b5%8e%b6%8e%de%8e%b7%8e%de%8e%b8%8e%de%8e%b9%8e%de%8e%ba%8e%de%8e%ca%8e%df%8e%cb%8e%df%8e%cc%8e%df%8e%cd%8e%df%8e%ce%8e%df (50)\r\n" { pass $test }
+}
+send "¤¢¤¤¤¦¤¨¤ª¤¬¤®¤°¤²¤´¤Ñ¤Ô¤×¤Ú¤Ý¤«¡«¤­¡«¤¯¡«¤±¡«¤³¡«¤Ï¡¬¤Ò¡¬¤Õ¡¬¤Ø¡¬¤Û¡¬\r"
+expect {
+    -ex "%a4%a2%a4%a4%a4%a6%a4%a8%a4%aa%a4%ac%a4%ae%a4%b0%a4%b2%a4%b4%a4%d1%a4%d4%a4%d7%a4%da%a4%dd%a4%ab%a1%ab%a4%ad%a1%ab%a4%af%a1%ab%a4%b1%a1%ab%a4%b3%a1%ab%a4%cf%a1%ac%a4%d2%a1%ac%a4%d5%a1%ac%a4%d8%a1%ac%a4%db%a1%ac (70)\r\n" { pass $test }
+}
+close
+
+set test "transliterate half-width spaces to full-width counterparts and full-width to half-width at a time"
+spawn tests/conv_kana EUC-JP "Ss"
+expect_after {
+    "\[^\r\n\]*\r\n" { fail $test }
+}
+send "£á£â£ã£ä£å£æ£ç£è£é£ê£ë£ì£í£î£ï£ð£ñ£ò£ó£ô£õ£ö£÷£ø£ù£ú\r"
+expect {
+    -ex "%a3%e1%a3%e2%a3%e3%a3%e4%a3%e5%a3%e6%a3%e7%a3%e8%a3%e9%a3%ea%a3%eb%a3%ec%a3%ed%a3%ee%a3%ef%a3%f0%a3%f1%a3%f2%a3%f3%a3%f4%a3%f5%a3%f6%a3%f7%a3%f8%a3%f9%a3%fa (52)\r\n" { pass $test }
+}
+send "£Á£Â£Ã£Ä£Å£Æ£Ç£È£É£Ê£Ë£Ì£Í£Î£Ï£Ð£Ñ£Ò£Ó£Ô£Õ£Ö£×£Ø£Ù£Ú\r"
+expect {
+    -ex "%a3%c1%a3%c2%a3%c3%a3%c4%a3%c5%a3%c6%a3%c7%a3%c8%a3%c9%a3%ca%a3%cb%a3%cc%a3%cd%a3%ce%a3%cf%a3%d0%a3%d1%a3%d2%a3%d3%a3%d4%a3%d5%a3%d6%a3%d7%a3%d8%a3%d9%a3%da (52)\r\n" { pass $test }
+}
+send "abcdefghijklmnopqrstuvwxyz\r"
+expect {
+    -ex "%61%62%63%64%65%66%67%68%69%6a%6b%6c%6d%6e%6f%70%71%72%73%74%75%76%77%78%79%7a (26)\r\n" { pass $test }
+}
+send "ABCDEFGHIJKLMNOPQRSTUVWXYZ\r"
+expect {
+    -ex "%41%42%43%44%45%46%47%48%49%4a%4b%4c%4d%4e%4f%50%51%52%53%54%55%56%57%58%59%5a (26)\r\n" { pass $test }
+}
+send "0123456789\r"
+expect {
+    -ex "%30%31%32%33%34%35%36%37%38%39 (10)\r\n" { pass $test }
+}
+send "£°£±£²£³£´£µ£¶£·£¸£¹\r"
+expect {
+    -ex "%a3%b1%a3%b2%a3%b3%a3%b4%a3%b5%a3%b6%a3%b7%a3%b8%a3%b9 (20)\r\n" { pass $test }
+}
+send "¡¡ \r"
+expect {
+    -ex "%20%a1%a1 (3)\r\n" { pass $test }
+}
+send "¥¢¥¤¥¦¥¨¥ª¥¬¥®¥°¥²¥´¥Ñ¥Ô¥×¥Ú¥Ý¥«¡«¥­¡«¥¯¡«¥±¡«¥³¡«¥Ï¡¬¥Ò¡¬¥Õ¡¬¥Ø¡¬¥Û¡¬\r"
+expect {
+    -ex "%a5%a2%a5%a4%a5%a6%a5%a8%a5%aa%a5%ac%a5%ae%a5%b0%a5%b2%a5%b4%a5%d1%a5%d4%a5%d7%a5%da%a5%dd%a5%ab%a1%ab%a5%ad%a1%ab%a5%af%a1%ab%a5%b1%a1%ab%a5%b3%a1%ab%a5%cf%a1%ac%a5%d2%a1%ac%a5%d5%a1%ac%a5%d8%a1%ac%a5%db%a1%ac (70)\r\n" { pass $test }
+}
+send "\8e±\8e²\8e³\8e´\8eµ\8e\8eÞ\8e·\8eÞ\8e¸\8eÞ\8e¹\8eÞ\8eº\8eÞ\8eÊ\8eß\8eË\8eß\8eÌ\8eß\8eÍ\8eß\8eÎ\8eß\r"
+expect {
+    -ex "%8e%b1%8e%b2%8e%b3%8e%b4%8e%b5%8e%b6%8e%de%8e%b7%8e%de%8e%b8%8e%de%8e%b9%8e%de%8e%ba%8e%de%8e%ca%8e%df%8e%cb%8e%df%8e%cc%8e%df%8e%cd%8e%df%8e%ce%8e%df (50)\r\n" { pass $test }
+}
+send "¤¢¤¤¤¦¤¨¤ª¤¬¤®¤°¤²¤´¤Ñ¤Ô¤×¤Ú¤Ý¤«¡«¤­¡«¤¯¡«¤±¡«¤³¡«¤Ï¡¬¤Ò¡¬¤Õ¡¬¤Ø¡¬¤Û¡¬\r"
+expect {
+    -ex "%a4%a2%a4%a4%a4%a6%a4%a8%a4%aa%a4%ac%a4%ae%a4%b0%a4%b2%a4%b4%a4%d1%a4%d4%a4%d7%a4%da%a4%dd%a4%ab%a1%ab%a4%ad%a1%ab%a4%af%a1%ab%a4%b1%a1%ab%a4%b3%a1%ab%a4%cf%a1%ac%a4%d2%a1%ac%a4%d5%a1%ac%a4%d8%a1%ac%a4%db%a1%ac (70)\r\n" { pass $test }
+}
+close
+
+set test "full-width katakanas to half-width counterparts"
+spawn tests/conv_kana EUC-JP "k"
+expect_after {
+    "\[^\r\n\]*\r\n" { fail $test }
+}
+send "£á£â£ã£ä£å£æ£ç£è£é£ê£ë£ì£í£î£ï£ð£ñ£ò£ó£ô£õ£ö£÷£ø£ù£ú\r"
+expect {
+    -ex "%a3%e1%a3%e2%a3%e3%a3%e4%a3%e5%a3%e6%a3%e7%a3%e8%a3%e9%a3%ea%a3%eb%a3%ec%a3%ed%a3%ee%a3%ef%a3%f0%a3%f1%a3%f2%a3%f3%a3%f4%a3%f5%a3%f6%a3%f7%a3%f8%a3%f9%a3%fa (52)\r\n" { pass $test }
+}
+send "£Á£Â£Ã£Ä£Å£Æ£Ç£È£É£Ê£Ë£Ì£Í£Î£Ï£Ð£Ñ£Ò£Ó£Ô£Õ£Ö£×£Ø£Ù£Ú\r"
+expect {
+    -ex "%a3%c1%a3%c2%a3%c3%a3%c4%a3%c5%a3%c6%a3%c7%a3%c8%a3%c9%a3%ca%a3%cb%a3%cc%a3%cd%a3%ce%a3%cf%a3%d0%a3%d1%a3%d2%a3%d3%a3%d4%a3%d5%a3%d6%a3%d7%a3%d8%a3%d9%a3%da (52)\r\n" { pass $test }
+}
+send "abcdefghijklmnopqrstuvwxyz\r"
+expect {
+    -ex "%61%62%63%64%65%66%67%68%69%6a%6b%6c%6d%6e%6f%70%71%72%73%74%75%76%77%78%79%7a (26)\r\n" { pass $test }
+}
+send "ABCDEFGHIJKLMNOPQRSTUVWXYZ\r"
+expect {
+    -ex "%41%42%43%44%45%46%47%48%49%4a%4b%4c%4d%4e%4f%50%51%52%53%54%55%56%57%58%59%5a (26)\r\n" { pass $test }
+}
+send "0123456789\r"
+expect {
+    -ex "%30%31%32%33%34%35%36%37%38%39 (10)\r\n" { pass $test }
+}
+send "£°£±£²£³£´£µ£¶£·£¸£¹\r"
+expect {
+    -ex "%a3%b1%a3%b2%a3%b3%a3%b4%a3%b5%a3%b6%a3%b7%a3%b8%a3%b9 (20)\r\n" { pass $test }
+}
+send "¡¡ \r"
+expect {
+    -ex "%a1%a1%20 (3)\r\n" { pass $test }
+}
+send "¥¢¥¤¥¦¥¨¥ª¥¬¥®¥°¥²¥´¥Ñ¥Ô¥×¥Ú¥Ý¥«¡«¥­¡«¥¯¡«¥±¡«¥³¡«¥Ï¡¬¥Ò¡¬¥Õ¡¬¥Ø¡¬¥Û¡¬\r"
+expect {
+    -ex "%8e%b1%8e%b2%8e%b3%8e%b4%8e%b5%8e%b6%8e%de%8e%b7%8e%de%8e%b8%8e%de%8e%b9%8e%de%8e%ba%8e%de%8e%ca%8e%df%8e%cb%8e%df%8e%cc%8e%df%8e%cd%8e%df%8e%ce%8e%df%8e%b6%8e%de%8e%b7%8e%de%8e%b8%8e%de%8e%b9%8e%de%8e%ba%8e%de%8e%ca%8e%df%8e%cb%8e%df%8e%cc%8e%df%8e%cd%8e%df%8e%ce%8e%df (90)\r\n" { pass $test }
+}
+send "\8e±\8e²\8e³\8e´\8eµ\8e\8eÞ\8e·\8eÞ\8e¸\8eÞ\8e¹\8eÞ\8eº\8eÞ\8eÊ\8eß\8eË\8eß\8eÌ\8eß\8eÍ\8eß\8eÎ\8eß\r"
+expect {
+    -ex "%8e%b1%8e%b2%8e%b3%8e%b4%8e%b5%8e%b6%8e%de%8e%b7%8e%de%8e%b8%8e%de%8e%b9%8e%de%8e%ba%8e%de%8e%ca%8e%df%8e%cb%8e%df%8e%cc%8e%df%8e%cd%8e%df%8e%ce%8e%df (50)\r\n" { pass $test }
+}
+send "¤¢¤¤¤¦¤¨¤ª¤¬¤®¤°¤²¤´¤Ñ¤Ô¤×¤Ú¤Ý¤«¡«¤­¡«¤¯¡«¤±¡«¤³¡«¤Ï¡¬¤Ò¡¬¤Õ¡¬¤Ø¡¬¤Û¡¬\r"
+expect {
+    -ex "%a4%a2%a4%a4%a4%a6%a4%a8%a4%aa%a4%ac%a4%ae%a4%b0%a4%b2%a4%b4%a4%d1%a4%d4%a4%d7%a4%da%a4%dd%a4%ab%8e%de%a4%ad%8e%de%a4%af%8e%de%a4%b1%8e%de%a4%b3%8e%de%a4%cf%8e%df%a4%d2%8e%df%a4%d5%8e%df%a4%d8%8e%df%a4%db%8e%df (70)\r\n" { pass $test }
+}
+close
+
+set test "half-width katakanas to full-width counterparts"
+spawn tests/conv_kana EUC-JP "K"
+expect_after {
+    "\[^\r\n\]*\r\n" { fail $test }
+}
+send "£á£â£ã£ä£å£æ£ç£è£é£ê£ë£ì£í£î£ï£ð£ñ£ò£ó£ô£õ£ö£÷£ø£ù£ú\r"
+expect {
+    -ex "%a3%e1%a3%e2%a3%e3%a3%e4%a3%e5%a3%e6%a3%e7%a3%e8%a3%e9%a3%ea%a3%eb%a3%ec%a3%ed%a3%ee%a3%ef%a3%f0%a3%f1%a3%f2%a3%f3%a3%f4%a3%f5%a3%f6%a3%f7%a3%f8%a3%f9%a3%fa (52)\r\n" { pass $test }
+}
+send "£Á£Â£Ã£Ä£Å£Æ£Ç£È£É£Ê£Ë£Ì£Í£Î£Ï£Ð£Ñ£Ò£Ó£Ô£Õ£Ö£×£Ø£Ù£Ú\r"
+expect {
+    -ex "%a3%c1%a3%c2%a3%c3%a3%c4%a3%c5%a3%c6%a3%c7%a3%c8%a3%c9%a3%ca%a3%cb%a3%cc%a3%cd%a3%ce%a3%cf%a3%d0%a3%d1%a3%d2%a3%d3%a3%d4%a3%d5%a3%d6%a3%d7%a3%d8%a3%d9%a3%da (52)\r\n" { pass $test }
+}
+send "abcdefghijklmnopqrstuvwxyz\r"
+expect {
+    -ex "%61%62%63%64%65%66%67%68%69%6a%6b%6c%6d%6e%6f%70%71%72%73%74%75%76%77%78%79%7a (26)\r\n" { pass $test }
+}
+send "ABCDEFGHIJKLMNOPQRSTUVWXYZ\r"
+expect {
+    -ex "%41%42%43%44%45%46%47%48%49%4a%4b%4c%4d%4e%4f%50%51%52%53%54%55%56%57%58%59%5a (26)\r\n" { pass $test }
+}
+send "0123456789\r"
+expect {
+    -ex "%30%31%32%33%34%35%36%37%38%39 (10)\r\n" { pass $test }
+}
+send "£°£±£²£³£´£µ£¶£·£¸£¹\r"
+expect {
+    -ex "%a3%b1%a3%b2%a3%b3%a3%b4%a3%b5%a3%b6%a3%b7%a3%b8%a3%b9 (20)\r\n" { pass $test }
+}
+send "¡¡ \r"
+expect {
+    -ex "%a1%a1%20 (3)\r\n" { pass $test }
+}
+send "¥¢¥¤¥¦¥¨¥ª¥¬¥®¥°¥²¥´¥Ñ¥Ô¥×¥Ú¥Ý¥«¡«¥­¡«¥¯¡«¥±¡«¥³¡«¥Ï¡¬¥Ò¡¬¥Õ¡¬¥Ø¡¬¥Û¡¬\r"
+expect {
+    -ex "%a5%a2%a5%a4%a5%a6%a5%a8%a5%aa%a5%ac%a5%ae%a5%b0%a5%b2%a5%b4%a5%d1%a5%d4%a5%d7%a5%da%a5%dd%a5%ab%a1%ab%a5%ad%a1%ab%a5%af%a1%ab%a5%b1%a1%ab%a5%b3%a1%ab%a5%cf%a1%ac%a5%d2%a1%ac%a5%d5%a1%ac%a5%d8%a1%ac%a5%db%a1%ac (70)\r\n" { pass $test }
+}
+send "\8e±\8e²\8e³\8e´\8eµ\8e\8eÞ\8e·\8eÞ\8e¸\8eÞ\8e¹\8eÞ\8eº\8eÞ\8eÊ\8eß\8eË\8eß\8eÌ\8eß\8eÍ\8eß\8eÎ\8eß\r"
+expect {
+    -ex "%a5%a2%a5%a4%a5%a6%a5%a8%a5%aa%a5%ab%a1%ab%a5%ad%a1%ab%a5%af%a1%ab%a5%b1%a1%ab%a5%b3%a1%ab%a5%cf%a1%ac%a5%d2%a1%ac%a5%d5%a1%ac%a5%d8%a1%ac%a5%db%a1%ac (50)\r\n" { pass $test }
+}
+send "¤¢¤¤¤¦¤¨¤ª¤¬¤®¤°¤²¤´¤Ñ¤Ô¤×¤Ú¤Ý¤«¡«¤­¡«¤¯¡«¤±¡«¤³¡«¤Ï¡¬¤Ò¡¬¤Õ¡¬¤Ø¡¬¤Û¡¬\r"
+expect {
+    -ex "%a4%a2%a4%a4%a4%a6%a4%a8%a4%aa%a4%ac%a4%ae%a4%b0%a4%b2%a4%b4%a4%d1%a4%d4%a4%d7%a4%da%a4%dd%a4%ab%a1%ab%a4%ad%a1%ab%a4%af%a1%ab%a4%b1%a1%ab%a4%b3%a1%ab%a4%cf%a1%ac%a4%d2%a1%ac%a4%d5%a1%ac%a4%d8%a1%ac%a4%db%a1%ac (70)\r\n" { pass $test }
+}
+close
+
+set test "transliterate half-width katakanas to full-width counterparts and full-width to half-width at a time"
+spawn tests/conv_kana EUC-JP "kK"
+expect_after {
+    "\[^\r\n\]*\r\n" { fail $test }
+}
+send "£á£â£ã£ä£å£æ£ç£è£é£ê£ë£ì£í£î£ï£ð£ñ£ò£ó£ô£õ£ö£÷£ø£ù£ú\r"
+expect {
+    -ex "%a3%e1%a3%e2%a3%e3%a3%e4%a3%e5%a3%e6%a3%e7%a3%e8%a3%e9%a3%ea%a3%eb%a3%ec%a3%ed%a3%ee%a3%ef%a3%f0%a3%f1%a3%f2%a3%f3%a3%f4%a3%f5%a3%f6%a3%f7%a3%f8%a3%f9%a3%fa (52)\r\n" { pass $test }
+}
+send "£Á£Â£Ã£Ä£Å£Æ£Ç£È£É£Ê£Ë£Ì£Í£Î£Ï£Ð£Ñ£Ò£Ó£Ô£Õ£Ö£×£Ø£Ù£Ú\r"
+expect {
+    -ex "%a3%c1%a3%c2%a3%c3%a3%c4%a3%c5%a3%c6%a3%c7%a3%c8%a3%c9%a3%ca%a3%cb%a3%cc%a3%cd%a3%ce%a3%cf%a3%d0%a3%d1%a3%d2%a3%d3%a3%d4%a3%d5%a3%d6%a3%d7%a3%d8%a3%d9%a3%da (52)\r\n" { pass $test }
+}
+send "abcdefghijklmnopqrstuvwxyz\r"
+expect {
+    -ex "%61%62%63%64%65%66%67%68%69%6a%6b%6c%6d%6e%6f%70%71%72%73%74%75%76%77%78%79%7a (26)\r\n" { pass $test }
+}
+send "ABCDEFGHIJKLMNOPQRSTUVWXYZ\r"
+expect {
+    -ex "%41%42%43%44%45%46%47%48%49%4a%4b%4c%4d%4e%4f%50%51%52%53%54%55%56%57%58%59%5a (26)\r\n" { pass $test }
+}
+send "0123456789\r"
+expect {
+    -ex "%30%31%32%33%34%35%36%37%38%39 (10)\r\n" { pass $test }
+}
+send "£°£±£²£³£´£µ£¶£·£¸£¹\r"
+expect {
+    -ex "%a3%b1%a3%b2%a3%b3%a3%b4%a3%b5%a3%b6%a3%b7%a3%b8%a3%b9 (20)\r\n" { pass $test }
+}
+send "¡¡ \r"
+expect {
+    -ex "%a1%a1%20 (3)\r\n" { pass $test }
+}
+send "¥¢¥¤¥¦¥¨¥ª¥¬¥®¥°¥²¥´¥Ñ¥Ô¥×¥Ú¥Ý¥«¡«¥­¡«¥¯¡«¥±¡«¥³¡«¥Ï¡¬¥Ò¡¬¥Õ¡¬¥Ø¡¬¥Û¡¬\r"
+expect {
+    -ex "%8e%b1%8e%b2%8e%b3%8e%b4%8e%b5%8e%b6%8e%de%8e%b7%8e%de%8e%b8%8e%de%8e%b9%8e%de%8e%ba%8e%de%8e%ca%8e%df%8e%cb%8e%df%8e%cc%8e%df%8e%cd%8e%df%8e%ce%8e%df%8e%b6%8e%de%8e%b7%8e%de%8e%b8%8e%de%8e%b9%8e%de%8e%ba%8e%de%8e%ca%8e%df%8e%cb%8e%df%8e%cc%8e%df%8e%cd%8e%df%8e%ce%8e%df (90)\r\n" { pass $test }
+}
+send "\8e±\8e²\8e³\8e´\8eµ\8e\8eÞ\8e·\8eÞ\8e¸\8eÞ\8e¹\8eÞ\8eº\8eÞ\8eÊ\8eß\8eË\8eß\8eÌ\8eß\8eÍ\8eß\8eÎ\8eß\r"
+expect {
+    -ex "%a5%a2%a5%a4%a5%a6%a5%a8%a5%aa%a5%ab%a1%ab%a5%ad%a1%ab%a5%af%a1%ab%a5%b1%a1%ab%a5%b3%a1%ab%a5%cf%a1%ac%a5%d2%a1%ac%a5%d5%a1%ac%a5%d8%a1%ac%a5%db%a1%ac (50)\r\n" { pass $test }
+}
+send "¤¢¤¤¤¦¤¨¤ª¤¬¤®¤°¤²¤´¤Ñ¤Ô¤×¤Ú¤Ý¤«¡«¤­¡«¤¯¡«¤±¡«¤³¡«¤Ï¡¬¤Ò¡¬¤Õ¡¬¤Ø¡¬¤Û¡¬\r"
+expect {
+    -ex "%a4%a2%a4%a4%a4%a6%a4%a8%a4%aa%a4%ac%a4%ae%a4%b0%a4%b2%a4%b4%a4%d1%a4%d4%a4%d7%a4%da%a4%dd%a4%ab%8e%de%a4%ad%8e%de%a4%af%8e%de%a4%b1%8e%de%a4%b3%8e%de%a4%cf%8e%df%a4%d2%8e%df%a4%d5%8e%df%a4%d8%8e%df%a4%db%8e%df (70)\r\n" { pass $test }
+}
+close
+
+set test "full-width hiraganas to half-width katakana"
+spawn tests/conv_kana EUC-JP "h"
+expect_after {
+    "\[^\r\n\]*\r\n" { fail $test }
+}
+send "£á£â£ã£ä£å£æ£ç£è£é£ê£ë£ì£í£î£ï£ð£ñ£ò£ó£ô£õ£ö£÷£ø£ù£ú\r"
+expect {
+    -ex "%a3%e1%a3%e2%a3%e3%a3%e4%a3%e5%a3%e6%a3%e7%a3%e8%a3%e9%a3%ea%a3%eb%a3%ec%a3%ed%a3%ee%a3%ef%a3%f0%a3%f1%a3%f2%a3%f3%a3%f4%a3%f5%a3%f6%a3%f7%a3%f8%a3%f9%a3%fa (52)\r\n" { pass $test }
+}
+send "£Á£Â£Ã£Ä£Å£Æ£Ç£È£É£Ê£Ë£Ì£Í£Î£Ï£Ð£Ñ£Ò£Ó£Ô£Õ£Ö£×£Ø£Ù£Ú\r"
+expect {
+    -ex "%a3%c1%a3%c2%a3%c3%a3%c4%a3%c5%a3%c6%a3%c7%a3%c8%a3%c9%a3%ca%a3%cb%a3%cc%a3%cd%a3%ce%a3%cf%a3%d0%a3%d1%a3%d2%a3%d3%a3%d4%a3%d5%a3%d6%a3%d7%a3%d8%a3%d9%a3%da (52)\r\n" { pass $test }
+}
+send "abcdefghijklmnopqrstuvwxyz\r"
+expect {
+    -ex "%61%62%63%64%65%66%67%68%69%6a%6b%6c%6d%6e%6f%70%71%72%73%74%75%76%77%78%79%7a (26)\r\n" { pass $test }
+}
+send "ABCDEFGHIJKLMNOPQRSTUVWXYZ\r"
+expect {
+    -ex "%41%42%43%44%45%46%47%48%49%4a%4b%4c%4d%4e%4f%50%51%52%53%54%55%56%57%58%59%5a (26)\r\n" { pass $test }
+}
+send "0123456789\r"
+expect {
+    -ex "%30%31%32%33%34%35%36%37%38%39 (10)\r\n" { pass $test }
+}
+send "£°£±£²£³£´£µ£¶£·£¸£¹\r"
+expect {
+    -ex "%a3%b1%a3%b2%a3%b3%a3%b4%a3%b5%a3%b6%a3%b7%a3%b8%a3%b9 (20)\r\n" { pass $test }
+}
+send "¡¡ \r"
+expect {
+    -ex "%a1%a1%20 (3)\r\n" { pass $test }
+}
+send "¥¢¥¤¥¦¥¨¥ª¥¬¥®¥°¥²¥´¥Ñ¥Ô¥×¥Ú¥Ý¥«¡«¥­¡«¥¯¡«¥±¡«¥³¡«¥Ï¡¬¥Ò¡¬¥Õ¡¬¥Ø¡¬¥Û¡¬\r"
+expect {
+    -ex "%a5%a2%a5%a4%a5%a6%a5%a8%a5%aa%a5%ac%a5%ae%a5%b0%a5%b2%a5%b4%a5%d1%a5%d4%a5%d7%a5%da%a5%dd%a5%ab%8e%de%a5%ad%8e%de%a5%af%8e%de%a5%b1%8e%de%a5%b3%8e%de%a5%cf%8e%df%a5%d2%8e%df%a5%d5%8e%df%a5%d8%8e%df%a5%db%8e%df (70)\r\n" { pass $test }
+}
+send "\8e±\8e²\8e³\8e´\8eµ\8e\8eÞ\8e·\8eÞ\8e¸\8eÞ\8e¹\8eÞ\8eº\8eÞ\8eÊ\8eß\8eË\8eß\8eÌ\8eß\8eÍ\8eß\8eÎ\8eß\r"
+expect {
+    -ex "%8e%b1%8e%b2%8e%b3%8e%b4%8e%b5%8e%b6%8e%de%8e%b7%8e%de%8e%b8%8e%de%8e%b9%8e%de%8e%ba%8e%de%8e%ca%8e%df%8e%cb%8e%df%8e%cc%8e%df%8e%cd%8e%df%8e%ce%8e%df (50)\r\n" { pass $test }
+}
+send "¤¢¤¤¤¦¤¨¤ª¤¬¤®¤°¤²¤´¤Ñ¤Ô¤×¤Ú¤Ý¤«¡«¤­¡«¤¯¡«¤±¡«¤³¡«¤Ï¡¬¤Ò¡¬¤Õ¡¬¤Ø¡¬¤Û¡¬\r"
+expect {
+    -ex "%8e%b1%8e%b2%8e%b3%8e%b4%8e%b5%8e%b6%8e%de%8e%b7%8e%de%8e%b8%8e%de%8e%b9%8e%de%8e%ba%8e%de%8e%ca%8e%df%8e%cb%8e%df%8e%cc%8e%df%8e%cd%8e%df%8e%ce%8e%df%8e%b6%8e%de%8e%b7%8e%de%8e%b8%8e%de%8e%b9%8e%de%8e%ba%8e%de%8e%ca%8e%df%8e%cb%8e%df%8e%cc%8e%df%8e%cd%8e%df%8e%ce%8e%df (90)\r\n" { pass $test }
+}
+close
+
+set test "half-width katakanas to full-width hiragana"
+spawn tests/conv_kana EUC-JP "H"
+expect_after {
+    "\[^\r\n\]*\r\n" { fail $test }
+}
+send "£á£â£ã£ä£å£æ£ç£è£é£ê£ë£ì£í£î£ï£ð£ñ£ò£ó£ô£õ£ö£÷£ø£ù£ú\r"
+expect {
+    -ex "%a3%e1%a3%e2%a3%e3%a3%e4%a3%e5%a3%e6%a3%e7%a3%e8%a3%e9%a3%ea%a3%eb%a3%ec%a3%ed%a3%ee%a3%ef%a3%f0%a3%f1%a3%f2%a3%f3%a3%f4%a3%f5%a3%f6%a3%f7%a3%f8%a3%f9%a3%fa (52)\r\n" { pass $test }
+}
+send "£Á£Â£Ã£Ä£Å£Æ£Ç£È£É£Ê£Ë£Ì£Í£Î£Ï£Ð£Ñ£Ò£Ó£Ô£Õ£Ö£×£Ø£Ù£Ú\r"
+expect {
+    -ex "%a3%c1%a3%c2%a3%c3%a3%c4%a3%c5%a3%c6%a3%c7%a3%c8%a3%c9%a3%ca%a3%cb%a3%cc%a3%cd%a3%ce%a3%cf%a3%d0%a3%d1%a3%d2%a3%d3%a3%d4%a3%d5%a3%d6%a3%d7%a3%d8%a3%d9%a3%da (52)\r\n" { pass $test }
+}
+send "abcdefghijklmnopqrstuvwxyz\r"
+expect {
+    -ex "%61%62%63%64%65%66%67%68%69%6a%6b%6c%6d%6e%6f%70%71%72%73%74%75%76%77%78%79%7a (26)\r\n" { pass $test }
+}
+send "ABCDEFGHIJKLMNOPQRSTUVWXYZ\r"
+expect {
+    -ex "%41%42%43%44%45%46%47%48%49%4a%4b%4c%4d%4e%4f%50%51%52%53%54%55%56%57%58%59%5a (26)\r\n" { pass $test }
+}
+send "0123456789\r"
+expect {
+    -ex "%30%31%32%33%34%35%36%37%38%39 (10)\r\n" { pass $test }
+}
+send "£°£±£²£³£´£µ£¶£·£¸£¹\r"
+expect {
+    -ex "%a3%b1%a3%b2%a3%b3%a3%b4%a3%b5%a3%b6%a3%b7%a3%b8%a3%b9 (20)\r\n" { pass $test }
+}
+send "¡¡ \r"
+expect {
+    -ex "%a1%a1%20 (3)\r\n" { pass $test }
+}
+send "¥¢¥¤¥¦¥¨¥ª¥¬¥®¥°¥²¥´¥Ñ¥Ô¥×¥Ú¥Ý¥«¡«¥­¡«¥¯¡«¥±¡«¥³¡«¥Ï¡¬¥Ò¡¬¥Õ¡¬¥Ø¡¬¥Û¡¬\r"
+expect {
+    -ex "%a5%a2%a5%a4%a5%a6%a5%a8%a5%aa%a5%ac%a5%ae%a5%b0%a5%b2%a5%b4%a5%d1%a5%d4%a5%d7%a5%da%a5%dd%a5%ab%a1%ab%a5%ad%a1%ab%a5%af%a1%ab%a5%b1%a1%ab%a5%b3%a1%ab%a5%cf%a1%ac%a5%d2%a1%ac%a5%d5%a1%ac%a5%d8%a1%ac%a5%db%a1%ac (70)\r\n" { pass $test }
+}
+send "\8e±\8e²\8e³\8e´\8eµ\8e\8eÞ\8e·\8eÞ\8e¸\8eÞ\8e¹\8eÞ\8eº\8eÞ\8eÊ\8eß\8eË\8eß\8eÌ\8eß\8eÍ\8eß\8eÎ\8eß\r"
+expect {
+    -ex "%a4%a2%a4%a4%a4%a6%a4%a8%a4%aa%a4%ab%a1%ab%a4%ad%a1%ab%a4%af%a1%ab%a4%b1%a1%ab%a4%b3%a1%ab%a4%cf%a1%ac%a4%d2%a1%ac%a4%d5%a1%ac%a4%d8%a1%ac%a4%db%a1%ac (50)\r\n" { pass $test }
+}
+send "¤¢¤¤¤¦¤¨¤ª¤¬¤®¤°¤²¤´¤Ñ¤Ô¤×¤Ú¤Ý¤«¡«¤­¡«¤¯¡«¤±¡«¤³¡«¤Ï¡¬¤Ò¡¬¤Õ¡¬¤Ø¡¬¤Û¡¬\r"
+expect {
+    -ex "%a4%a2%a4%a4%a4%a6%a4%a8%a4%aa%a4%ac%a4%ae%a4%b0%a4%b2%a4%b4%a4%d1%a4%d4%a4%d7%a4%da%a4%dd%a4%ab%a1%ab%a4%ad%a1%ab%a4%af%a1%ab%a4%b1%a1%ab%a4%b3%a1%ab%a4%cf%a1%ac%a4%d2%a1%ac%a4%d5%a1%ac%a4%d8%a1%ac%a4%db%a1%ac (70)\r\n" { pass $test }
+}
+close
+
+set test "transliterate half-width katakanas to full-width hiraganas and full-width to half-width at a time"
+spawn tests/conv_kana EUC-JP "hH"
+expect_after {
+    "\[^\r\n\]*\r\n" { fail $test }
+}
+send "£á£â£ã£ä£å£æ£ç£è£é£ê£ë£ì£í£î£ï£ð£ñ£ò£ó£ô£õ£ö£÷£ø£ù£ú\r"
+expect {
+    -ex "%a3%e1%a3%e2%a3%e3%a3%e4%a3%e5%a3%e6%a3%e7%a3%e8%a3%e9%a3%ea%a3%eb%a3%ec%a3%ed%a3%ee%a3%ef%a3%f0%a3%f1%a3%f2%a3%f3%a3%f4%a3%f5%a3%f6%a3%f7%a3%f8%a3%f9%a3%fa (52)\r\n" { pass $test }
+}
+send "£Á£Â£Ã£Ä£Å£Æ£Ç£È£É£Ê£Ë£Ì£Í£Î£Ï£Ð£Ñ£Ò£Ó£Ô£Õ£Ö£×£Ø£Ù£Ú\r"
+expect {
+    -ex "%a3%c1%a3%c2%a3%c3%a3%c4%a3%c5%a3%c6%a3%c7%a3%c8%a3%c9%a3%ca%a3%cb%a3%cc%a3%cd%a3%ce%a3%cf%a3%d0%a3%d1%a3%d2%a3%d3%a3%d4%a3%d5%a3%d6%a3%d7%a3%d8%a3%d9%a3%da (52)\r\n" { pass $test }
+}
+send "abcdefghijklmnopqrstuvwxyz\r"
+expect {
+    -ex "%61%62%63%64%65%66%67%68%69%6a%6b%6c%6d%6e%6f%70%71%72%73%74%75%76%77%78%79%7a (26)\r\n" { pass $test }
+}
+send "ABCDEFGHIJKLMNOPQRSTUVWXYZ\r"
+expect {
+    -ex "%41%42%43%44%45%46%47%48%49%4a%4b%4c%4d%4e%4f%50%51%52%53%54%55%56%57%58%59%5a (26)\r\n" { pass $test }
+}
+send "0123456789\r"
+expect {
+    -ex "%30%31%32%33%34%35%36%37%38%39 (10)\r\n" { pass $test }
+}
+send "£°£±£²£³£´£µ£¶£·£¸£¹\r"
+expect {
+    -ex "%a3%b1%a3%b2%a3%b3%a3%b4%a3%b5%a3%b6%a3%b7%a3%b8%a3%b9 (20)\r\n" { pass $test }
+}
+send "¡¡ \r"
+expect {
+    -ex "%a1%a1%20 (3)\r\n" { pass $test }
+}
+send "¥¢¥¤¥¦¥¨¥ª¥¬¥®¥°¥²¥´¥Ñ¥Ô¥×¥Ú¥Ý¥«¡«¥­¡«¥¯¡«¥±¡«¥³¡«¥Ï¡¬¥Ò¡¬¥Õ¡¬¥Ø¡¬¥Û¡¬\r"
+expect {
+    -ex "%a5%a2%a5%a4%a5%a6%a5%a8%a5%aa%a5%ac%a5%ae%a5%b0%a5%b2%a5%b4%a5%d1%a5%d4%a5%d7%a5%da%a5%dd%a5%ab%8e%de%a5%ad%8e%de%a5%af%8e%de%a5%b1%8e%de%a5%b3%8e%de%a5%cf%8e%df%a5%d2%8e%df%a5%d5%8e%df%a5%d8%8e%df%a5%db%8e%df (70)\r\n" { pass $test }
+}
+send "\8e±\8e²\8e³\8e´\8eµ\8e\8eÞ\8e·\8eÞ\8e¸\8eÞ\8e¹\8eÞ\8eº\8eÞ\8eÊ\8eß\8eË\8eß\8eÌ\8eß\8eÍ\8eß\8eÎ\8eß\r"
+expect {
+    -ex "%a4%a2%a4%a4%a4%a6%a4%a8%a4%aa%a4%ab%a1%ab%a4%ad%a1%ab%a4%af%a1%ab%a4%b1%a1%ab%a4%b3%a1%ab%a4%cf%a1%ac%a4%d2%a1%ac%a4%d5%a1%ac%a4%d8%a1%ac%a4%db%a1%ac (50)\r\n" { pass $test }
+}
+send "¤¢¤¤¤¦¤¨¤ª¤¬¤®¤°¤²¤´¤Ñ¤Ô¤×¤Ú¤Ý¤«¡«¤­¡«¤¯¡«¤±¡«¤³¡«¤Ï¡¬¤Ò¡¬¤Õ¡¬¤Ø¡¬¤Û¡¬\r"
+expect {
+    -ex "%8e%b1%8e%b2%8e%b3%8e%b4%8e%b5%8e%b6%8e%de%8e%b7%8e%de%8e%b8%8e%de%8e%b9%8e%de%8e%ba%8e%de%8e%ca%8e%df%8e%cb%8e%df%8e%cc%8e%df%8e%cd%8e%df%8e%ce%8e%df%8e%b6%8e%de%8e%b7%8e%de%8e%b8%8e%de%8e%b9%8e%de%8e%ba%8e%de%8e%ca%8e%df%8e%cb%8e%df%8e%cc%8e%df%8e%cd%8e%df%8e%ce%8e%df (90)\r\n" { pass $test }
+}
+close
+
+set test "full-width katakanas to full-width hiragana" 
+spawn tests/conv_kana EUC-JP "c"
+expect_after {
+    "\[^\r\n\]*\r\n" { fail $test }
+}
+send "£á£â£ã£ä£å£æ£ç£è£é£ê£ë£ì£í£î£ï£ð£ñ£ò£ó£ô£õ£ö£÷£ø£ù£ú\r"
+expect {
+    -ex "%a3%e1%a3%e2%a3%e3%a3%e4%a3%e5%a3%e6%a3%e7%a3%e8%a3%e9%a3%ea%a3%eb%a3%ec%a3%ed%a3%ee%a3%ef%a3%f0%a3%f1%a3%f2%a3%f3%a3%f4%a3%f5%a3%f6%a3%f7%a3%f8%a3%f9%a3%fa (52)\r\n" { pass $test }
+}
+send "£Á£Â£Ã£Ä£Å£Æ£Ç£È£É£Ê£Ë£Ì£Í£Î£Ï£Ð£Ñ£Ò£Ó£Ô£Õ£Ö£×£Ø£Ù£Ú\r"
+expect {
+    -ex "%a3%c1%a3%c2%a3%c3%a3%c4%a3%c5%a3%c6%a3%c7%a3%c8%a3%c9%a3%ca%a3%cb%a3%cc%a3%cd%a3%ce%a3%cf%a3%d0%a3%d1%a3%d2%a3%d3%a3%d4%a3%d5%a3%d6%a3%d7%a3%d8%a3%d9%a3%da (52)\r\n" { pass $test }
+}
+send "abcdefghijklmnopqrstuvwxyz\r"
+expect {
+    -ex "%61%62%63%64%65%66%67%68%69%6a%6b%6c%6d%6e%6f%70%71%72%73%74%75%76%77%78%79%7a (26)\r\n" { pass $test }
+}
+send "ABCDEFGHIJKLMNOPQRSTUVWXYZ\r"
+expect {
+    -ex "%41%42%43%44%45%46%47%48%49%4a%4b%4c%4d%4e%4f%50%51%52%53%54%55%56%57%58%59%5a (26)\r\n" { pass $test }
+}
+send "0123456789\r"
+expect {
+    -ex "%30%31%32%33%34%35%36%37%38%39 (10)\r\n" { pass $test }
+}
+send "£°£±£²£³£´£µ£¶£·£¸£¹\r"
+expect {
+    -ex "%a3%b1%a3%b2%a3%b3%a3%b4%a3%b5%a3%b6%a3%b7%a3%b8%a3%b9 (20)\r\n" { pass $test }
+}
+send "¡¡ \r"
+expect {
+    -ex "%a1%a1%20 (3)\r\n" { pass $test }
+}
+send "¥¢¥¤¥¦¥¨¥ª¥¬¥®¥°¥²¥´¥Ñ¥Ô¥×¥Ú¥Ý¥«¡«¥­¡«¥¯¡«¥±¡«¥³¡«¥Ï¡¬¥Ò¡¬¥Õ¡¬¥Ø¡¬¥Û¡¬\r"
+expect {
+    -ex "%a4%a2%a4%a4%a4%a6%a4%a8%a4%aa%a4%ac%a4%ae%a4%b0%a4%b2%a4%b4%a4%d1%a4%d4%a4%d7%a4%da%a4%dd%a4%ab%a1%ab%a4%ad%a1%ab%a4%af%a1%ab%a4%b1%a1%ab%a4%b3%a1%ab%a4%cf%a1%ac%a4%d2%a1%ac%a4%d5%a1%ac%a4%d8%a1%ac%a4%db%a1%ac (70)\r\n" { pass $test }
+}
+send "\8e±\8e²\8e³\8e´\8eµ\8e\8eÞ\8e·\8eÞ\8e¸\8eÞ\8e¹\8eÞ\8eº\8eÞ\8eÊ\8eß\8eË\8eß\8eÌ\8eß\8eÍ\8eß\8eÎ\8eß\r"
+expect {
+    -ex "%8e%b1%8e%b2%8e%b3%8e%b4%8e%b5%8e%b6%8e%de%8e%b7%8e%de%8e%b8%8e%de%8e%b9%8e%de%8e%ba%8e%de%8e%ca%8e%df%8e%cb%8e%df%8e%cc%8e%df%8e%cd%8e%df%8e%ce%8e%df (50)\r\n" { pass $test }
+}
+send "¤¢¤¤¤¦¤¨¤ª¤¬¤®¤°¤²¤´¤Ñ¤Ô¤×¤Ú¤Ý¤«¡«¤­¡«¤¯¡«¤±¡«¤³¡«¤Ï¡¬¤Ò¡¬¤Õ¡¬¤Ø¡¬¤Û¡¬\r"
+expect {
+    -ex "%a4%a2%a4%a4%a4%a6%a4%a8%a4%aa%a4%ac%a4%ae%a4%b0%a4%b2%a4%b4%a4%d1%a4%d4%a4%d7%a4%da%a4%dd%a4%ab%a1%ab%a4%ad%a1%ab%a4%af%a1%ab%a4%b1%a1%ab%a4%b3%a1%ab%a4%cf%a1%ac%a4%d2%a1%ac%a4%d5%a1%ac%a4%d8%a1%ac%a4%db%a1%ac (70)\r\n" { pass $test }
+}
+close
+
+set test "full-width hiraganas to full-width katakana"
+spawn tests/conv_kana EUC-JP "C"
+expect_after {
+    "\[^\r\n\]*\r\n" { fail $test }
+}
+send "£á£â£ã£ä£å£æ£ç£è£é£ê£ë£ì£í£î£ï£ð£ñ£ò£ó£ô£õ£ö£÷£ø£ù£ú\r"
+expect {
+    -ex "%a3%e1%a3%e2%a3%e3%a3%e4%a3%e5%a3%e6%a3%e7%a3%e8%a3%e9%a3%ea%a3%eb%a3%ec%a3%ed%a3%ee%a3%ef%a3%f0%a3%f1%a3%f2%a3%f3%a3%f4%a3%f5%a3%f6%a3%f7%a3%f8%a3%f9%a3%fa (52)\r\n" { pass $test }
+}
+send "£Á£Â£Ã£Ä£Å£Æ£Ç£È£É£Ê£Ë£Ì£Í£Î£Ï£Ð£Ñ£Ò£Ó£Ô£Õ£Ö£×£Ø£Ù£Ú\r"
+expect {
+    -ex "%a3%c1%a3%c2%a3%c3%a3%c4%a3%c5%a3%c6%a3%c7%a3%c8%a3%c9%a3%ca%a3%cb%a3%cc%a3%cd%a3%ce%a3%cf%a3%d0%a3%d1%a3%d2%a3%d3%a3%d4%a3%d5%a3%d6%a3%d7%a3%d8%a3%d9%a3%da (52)\r\n" { pass $test }
+}
+send "0123456789\r"
+expect {
+    -ex "%30%31%32%33%34%35%36%37%38%39 (10)\r\n" { pass $test }
+}
+send "£°£±£²£³£´£µ£¶£·£¸£¹\r"
+expect {
+    -ex "%a3%b1%a3%b2%a3%b3%a3%b4%a3%b5%a3%b6%a3%b7%a3%b8%a3%b9 (20)\r\n" { pass $test }
+}
+send "¡¡ \r"
+expect {
+    -ex "%a1%a1%20 (3)\r\n" { pass $test }
+}
+send "¥¢¥¤¥¦¥¨¥ª¥¬¥®¥°¥²¥´¥Ñ¥Ô¥×¥Ú¥Ý¥«¡«¥­¡«¥¯¡«¥±¡«¥³¡«¥Ï¡¬¥Ò¡¬¥Õ¡¬¥Ø¡¬¥Û¡¬\r"
+expect {
+    -ex "%a5%a2%a5%a4%a5%a6%a5%a8%a5%aa%a5%ac%a5%ae%a5%b0%a5%b2%a5%b4%a5%d1%a5%d4%a5%d7%a5%da%a5%dd%a5%ab%a1%ab%a5%ad%a1%ab%a5%af%a1%ab%a5%b1%a1%ab%a5%b3%a1%ab%a5%cf%a1%ac%a5%d2%a1%ac%a5%d5%a1%ac%a5%d8%a1%ac%a5%db%a1%ac (70)\r\n" { pass $test }
+}
+send "\8e±\8e²\8e³\8e´\8eµ\8e\8eÞ\8e·\8eÞ\8e¸\8eÞ\8e¹\8eÞ\8eº\8eÞ\8eÊ\8eß\8eË\8eß\8eÌ\8eß\8eÍ\8eß\8eÎ\8eß\r"
+expect {
+    -ex "%8e%b1%8e%b2%8e%b3%8e%b4%8e%b5%8e%b6%8e%de%8e%b7%8e%de%8e%b8%8e%de%8e%b9%8e%de%8e%ba%8e%de%8e%ca%8e%df%8e%cb%8e%df%8e%cc%8e%df%8e%cd%8e%df%8e%ce%8e%df (50)\r\n" { pass $test }
+}
+send "¤¢¤¤¤¦¤¨¤ª¤¬¤®¤°¤²¤´¤Ñ¤Ô¤×¤Ú¤Ý¤«¡«¤­¡«¤¯¡«¤±¡«¤³¡«¤Ï¡¬¤Ò¡¬¤Õ¡¬¤Ø¡¬¤Û¡¬\r"
+expect {
+    -ex "%a5%a2%a5%a4%a5%a6%a5%a8%a5%aa%a5%ac%a5%ae%a5%b0%a5%b2%a5%b4%a5%d1%a5%d4%a5%d7%a5%da%a5%dd%a5%ab%a1%ab%a5%ad%a1%ab%a5%af%a1%ab%a5%b1%a1%ab%a5%b3%a1%ab%a5%cf%a1%ac%a5%d2%a1%ac%a5%d5%a1%ac%a5%d8%a1%ac%a5%db%a1%ac (70)\r\n" { pass $test }
+}
+close
+
+set test "conversion between full-width hiraganas and full-width katakanas, flags for both directions are specified at a time"
+spawn tests/conv_kana EUC-JP "Cc"
+expect_after {
+    "\[^\r\n\]*\r\n" { fail $test }
+}
+send "£á£â£ã£ä£å£æ£ç£è£é£ê£ë£ì£í£î£ï£ð£ñ£ò£ó£ô£õ£ö£÷£ø£ù£ú\r"
+expect {
+    -ex "%a3%e1%a3%e2%a3%e3%a3%e4%a3%e5%a3%e6%a3%e7%a3%e8%a3%e9%a3%ea%a3%eb%a3%ec%a3%ed%a3%ee%a3%ef%a3%f0%a3%f1%a3%f2%a3%f3%a3%f4%a3%f5%a3%f6%a3%f7%a3%f8%a3%f9%a3%fa (52)\r\n" { pass $test }
+}
+send "£Á£Â£Ã£Ä£Å£Æ£Ç£È£É£Ê£Ë£Ì£Í£Î£Ï£Ð£Ñ£Ò£Ó£Ô£Õ£Ö£×£Ø£Ù£Ú\r"
+expect {
+    -ex "%a3%c1%a3%c2%a3%c3%a3%c4%a3%c5%a3%c6%a3%c7%a3%c8%a3%c9%a3%ca%a3%cb%a3%cc%a3%cd%a3%ce%a3%cf%a3%d0%a3%d1%a3%d2%a3%d3%a3%d4%a3%d5%a3%d6%a3%d7%a3%d8%a3%d9%a3%da (52)\r\n" { pass $test }
+}
+send "abcdefghijklmnopqrstuvwxyz\r"
+expect {
+    -ex "%61%62%63%64%65%66%67%68%69%6a%6b%6c%6d%6e%6f%70%71%72%73%74%75%76%77%78%79%7a (26)\r\n" { pass $test }
+}
+send "ABCDEFGHIJKLMNOPQRSTUVWXYZ\r"
+expect {
+    -ex "%41%42%43%44%45%46%47%48%49%4a%4b%4c%4d%4e%4f%50%51%52%53%54%55%56%57%58%59%5a (26)\r\n" { pass $test }
+}
+send "0123456789\r"
+expect {
+    -ex "%30%31%32%33%34%35%36%37%38%39 (10)\r\n" { pass $test }
+}
+send "£°£±£²£³£´£µ£¶£·£¸£¹\r"
+expect {
+    -ex "%a3%b1%a3%b2%a3%b3%a3%b4%a3%b5%a3%b6%a3%b7%a3%b8%a3%b9 (20)\r\n" { pass $test }
+}
+send "¡¡ \r"
+expect {
+    -ex "%a1%a1%20 (3)\r\n" { pass $test }
+}
+send "¥¢¥¤¥¦¥¨¥ª¥¬¥®¥°¥²¥´¥Ñ¥Ô¥×¥Ú¥Ý¥«¡«¥­¡«¥¯¡«¥±¡«¥³¡«¥Ï¡¬¥Ò¡¬¥Õ¡¬¥Ø¡¬¥Û¡¬\r"
+expect {
+    -ex "%a4%a2%a4%a4%a4%a6%a4%a8%a4%aa%a4%ac%a4%ae%a4%b0%a4%b2%a4%b4%a4%d1%a4%d4%a4%d7%a4%da%a4%dd%a4%ab%a1%ab%a4%ad%a1%ab%a4%af%a1%ab%a4%b1%a1%ab%a4%b3%a1%ab%a4%cf%a1%ac%a4%d2%a1%ac%a4%d5%a1%ac%a4%d8%a1%ac%a4%db%a1%ac (70)\r\n" { pass $test }
+}
+send "\8e±\8e²\8e³\8e´\8eµ\8e\8eÞ\8e·\8eÞ\8e¸\8eÞ\8e¹\8eÞ\8eº\8eÞ\8eÊ\8eß\8eË\8eß\8eÌ\8eß\8eÍ\8eß\8eÎ\8eß\r"
+expect {
+    -ex "%8e%b1%8e%b2%8e%b3%8e%b4%8e%b5%8e%b6%8e%de%8e%b7%8e%de%8e%b8%8e%de%8e%b9%8e%de%8e%ba%8e%de%8e%ca%8e%df%8e%cb%8e%df%8e%cc%8e%df%8e%cd%8e%df%8e%ce%8e%df (50)\r\n" { pass $test }
+}
+send "¤¢¤¤¤¦¤¨¤ª¤¬¤®¤°¤²¤´¤Ñ¤Ô¤×¤Ú¤Ý¤«¡«¤­¡«¤¯¡«¤±¡«¤³¡«¤Ï¡¬¤Ò¡¬¤Õ¡¬¤Ø¡¬¤Û¡¬\r"
+expect {
+    -ex "%a5%a2%a5%a4%a5%a6%a5%a8%a5%aa%a5%ac%a5%ae%a5%b0%a5%b2%a5%b4%a5%d1%a5%d4%a5%d7%a5%da%a5%dd%a5%ab%a1%ab%a5%ad%a1%ab%a5%af%a1%ab%a5%b1%a1%ab%a5%b3%a1%ab%a5%cf%a1%ac%a5%d2%a1%ac%a5%d5%a1%ac%a5%d8%a1%ac%a5%db%a1%ac (70)\r\n" { pass $test }
+}
+close
+
+set test "half-width katakanas to full-width katakanas, with voiced marks combined"
+spawn tests/conv_kana EUC-JP "KV"
+expect_after {
+    "\[^\r\n\]*\r\n" { fail $test }
+}
+send "£á£â£ã£ä£å£æ£ç£è£é£ê£ë£ì£í£î£ï£ð£ñ£ò£ó£ô£õ£ö£÷£ø£ù£ú\r"
+expect {
+    -ex "%a3%e1%a3%e2%a3%e3%a3%e4%a3%e5%a3%e6%a3%e7%a3%e8%a3%e9%a3%ea%a3%eb%a3%ec%a3%ed%a3%ee%a3%ef%a3%f0%a3%f1%a3%f2%a3%f3%a3%f4%a3%f5%a3%f6%a3%f7%a3%f8%a3%f9%a3%fa (52)\r\n" { pass $test }
+}
+send "£Á£Â£Ã£Ä£Å£Æ£Ç£È£É£Ê£Ë£Ì£Í£Î£Ï£Ð£Ñ£Ò£Ó£Ô£Õ£Ö£×£Ø£Ù£Ú\r"
+expect {
+    -ex "%a3%c1%a3%c2%a3%c3%a3%c4%a3%c5%a3%c6%a3%c7%a3%c8%a3%c9%a3%ca%a3%cb%a3%cc%a3%cd%a3%ce%a3%cf%a3%d0%a3%d1%a3%d2%a3%d3%a3%d4%a3%d5%a3%d6%a3%d7%a3%d8%a3%d9%a3%da (52)\r\n" { pass $test }
+}
+send "abcdefghijklmnopqrstuvwxyz\r"
+expect {
+    -ex "%61%62%63%64%65%66%67%68%69%6a%6b%6c%6d%6e%6f%70%71%72%73%74%75%76%77%78%79%7a (26)\r\n" { pass $test }
+}
+send "ABCDEFGHIJKLMNOPQRSTUVWXYZ\r"
+expect {
+    -ex "%41%42%43%44%45%46%47%48%49%4a%4b%4c%4d%4e%4f%50%51%52%53%54%55%56%57%58%59%5a (26)\r\n" { pass $test }
+}
+send "0123456789\r"
+expect {
+    -ex "%30%31%32%33%34%35%36%37%38%39 (10)\r\n" { pass $test }
+}
+send "£°£±£²£³£´£µ£¶£·£¸£¹\r"
+expect {
+    -ex "%a3%b1%a3%b2%a3%b3%a3%b4%a3%b5%a3%b6%a3%b7%a3%b8%a3%b9 (20)\r\n" { pass $test }
+}
+send "¡¡ \r"
+expect {
+    -ex "%a1%a1%20 (3)\r\n" { pass $test }
+}
+send "¥¢¥¤¥¦¥¨¥ª¥¬¥®¥°¥²¥´¥Ñ¥Ô¥×¥Ú¥Ý¥«¡«¥­¡«¥¯¡«¥±¡«¥³¡«¥Ï¡¬¥Ò¡¬¥Õ¡¬¥Ø¡¬¥Û¡¬\r"
+expect {
+    -ex "%a5%a2%a5%a4%a5%a6%a5%a8%a5%aa%a5%ac%a5%ae%a5%b0%a5%b2%a5%b4%a5%d1%a5%d4%a5%d7%a5%da%a5%dd%a5%ab%a1%ab%a5%ad%a1%ab%a5%af%a1%ab%a5%b1%a1%ab%a5%b3%a1%ab%a5%cf%a1%ac%a5%d2%a1%ac%a5%d5%a1%ac%a5%d8%a1%ac%a5%db%a1%ac (70)\r\n" { pass $test }
+}
+send "\8e±\8e²\8e³\8e´\8eµ\8e\8eÞ\8e·\8eÞ\8e¸\8eÞ\8e¹\8eÞ\8eº\8eÞ\8eÊ\8eß\8eË\8eß\8eÌ\8eß\8eÍ\8eß\8eÎ\8eß\r"
+expect {
+    -ex "%a5%a2%a5%a4%a5%a6%a5%a8%a5%aa%a5%ac%a5%ae%a5%b0%a5%b2%a5%b4%a5%d1%a5%d4%a5%d7%a5%da%a5%dd (30)\r\n" { pass $test }
+}
+send "¤¢¤¤¤¦¤¨¤ª¤¬¤®¤°¤²¤´¤Ñ¤Ô¤×¤Ú¤Ý¤«¡«¤­¡«¤¯¡«¤±¡«¤³¡«¤Ï¡¬¤Ò¡¬¤Õ¡¬¤Ø¡¬¤Û¡¬\r"
+expect {
+    -ex "%a4%a2%a4%a4%a4%a6%a4%a8%a4%aa%a4%ac%a4%ae%a4%b0%a4%b2%a4%b4%a4%d1%a4%d4%a4%d7%a4%da%a4%dd%a4%ab%a1%ab%a4%ad%a1%ab%a4%af%a1%ab%a4%b1%a1%ab%a4%b3%a1%ab%a4%cf%a1%ac%a4%d2%a1%ac%a4%d5%a1%ac%a4%d8%a1%ac%a4%db%a1%ac (70)\r\n" { pass $test }
+}
+close
+
+set test "half-width katakanas to full-width hiraganas, with voiced marks combined"
+spawn tests/conv_kana EUC-JP "HV"
+expect_after {
+    "\[^\r\n\]*\r\n" { fail $test }
+}
+send "£á£â£ã£ä£å£æ£ç£è£é£ê£ë£ì£í£î£ï£ð£ñ£ò£ó£ô£õ£ö£÷£ø£ù£ú\r"
+expect {
+    -ex "%a3%e1%a3%e2%a3%e3%a3%e4%a3%e5%a3%e6%a3%e7%a3%e8%a3%e9%a3%ea%a3%eb%a3%ec%a3%ed%a3%ee%a3%ef%a3%f0%a3%f1%a3%f2%a3%f3%a3%f4%a3%f5%a3%f6%a3%f7%a3%f8%a3%f9%a3%fa (52)\r\n" { pass $test }
+}
+send "£Á£Â£Ã£Ä£Å£Æ£Ç£È£É£Ê£Ë£Ì£Í£Î£Ï£Ð£Ñ£Ò£Ó£Ô£Õ£Ö£×£Ø£Ù£Ú\r"
+expect {
+    -ex "%a3%c1%a3%c2%a3%c3%a3%c4%a3%c5%a3%c6%a3%c7%a3%c8%a3%c9%a3%ca%a3%cb%a3%cc%a3%cd%a3%ce%a3%cf%a3%d0%a3%d1%a3%d2%a3%d3%a3%d4%a3%d5%a3%d6%a3%d7%a3%d8%a3%d9%a3%da (52)\r\n" { pass $test }
+}
+send "abcdefghijklmnopqrstuvwxyz\r"
+expect {
+    -ex "%61%62%63%64%65%66%67%68%69%6a%6b%6c%6d%6e%6f%70%71%72%73%74%75%76%77%78%79%7a (26)\r\n" { pass $test }
+}
+send "ABCDEFGHIJKLMNOPQRSTUVWXYZ\r"
+expect {
+    -ex "%41%42%43%44%45%46%47%48%49%4a%4b%4c%4d%4e%4f%50%51%52%53%54%55%56%57%58%59%5a (26)\r\n" { pass $test }
+}
+send "0123456789\r"
+expect {
+    -ex "%30%31%32%33%34%35%36%37%38%39 (10)\r\n" { pass $test }
+}
+send "£°£±£²£³£´£µ£¶£·£¸£¹\r"
+expect {
+    -ex "%a3%b1%a3%b2%a3%b3%a3%b4%a3%b5%a3%b6%a3%b7%a3%b8%a3%b9 (20)\r\n" { pass $test }
+}
+send "¡¡ \r"
+expect {
+    -ex "%a1%a1%20 (3)\r\n" { pass $test }
+}
+send "¥¢¥¤¥¦¥¨¥ª¥¬¥®¥°¥²¥´¥Ñ¥Ô¥×¥Ú¥Ý¥«¡«¥­¡«¥¯¡«¥±¡«¥³¡«¥Ï¡¬¥Ò¡¬¥Õ¡¬¥Ø¡¬¥Û¡¬\r"
+expect {
+    -ex "%a5%a2%a5%a4%a5%a6%a5%a8%a5%aa%a5%ac%a5%ae%a5%b0%a5%b2%a5%b4%a5%d1%a5%d4%a5%d7%a5%da%a5%dd%a5%ab%a1%ab%a5%ad%a1%ab%a5%af%a1%ab%a5%b1%a1%ab%a5%b3%a1%ab%a5%cf%a1%ac%a5%d2%a1%ac%a5%d5%a1%ac%a5%d8%a1%ac%a5%db%a1%ac (70)\r\n" { pass $test }
+}
+send "\8e±\8e²\8e³\8e´\8eµ\8e\8eÞ\8e·\8eÞ\8e¸\8eÞ\8e¹\8eÞ\8eº\8eÞ\8eÊ\8eß\8eË\8eß\8eÌ\8eß\8eÍ\8eß\8eÎ\8eß\r"
+expect {
+    -ex "%a4%a2%a4%a4%a4%a6%a4%a8%a4%aa%a4%ac%a4%ae%a4%b0%a4%b2%a4%b4%a4%d1%a4%d4%a4%d7%a4%da%a4%dd (30)\r\n" { pass $test }
+}
+send "¤¢¤¤¤¦¤¨¤ª¤¬¤®¤°¤²¤´¤Ñ¤Ô¤×¤Ú¤Ý¤«¡«¤­¡«¤¯¡«¤±¡«¤³¡«¤Ï¡¬¤Ò¡¬¤Õ¡¬¤Ø¡¬¤Û¡¬\r"
+expect {
+    -ex "%a4%a2%a4%a4%a4%a6%a4%a8%a4%aa%a4%ac%a4%ae%a4%b0%a4%b2%a4%b4%a4%d1%a4%d4%a4%d7%a4%da%a4%dd%a4%ab%a1%ab%a4%ad%a1%ab%a4%af%a1%ab%a4%b1%a1%ab%a4%b3%a1%ab%a4%cf%a1%ac%a4%d2%a1%ac%a4%d5%a1%ac%a4%d8%a1%ac%a4%db%a1%ac (70)\r\n" { pass $test }
+}
+close
+
+
+# vim: sts=4 ts=4 sw=4 et encoding=EUC-JP
diff --git a/ext/mbstring/libmbfl/tests/strwidth.tests/strwidth.exp b/ext/mbstring/libmbfl/tests/strwidth.tests/strwidth.exp
new file mode 100644 (file)
index 0000000..09f518c
--- /dev/null
@@ -0,0 +1,47 @@
+#!/usr/bin/expect -f
+spawn tests/strwidth Japanese UTF-8
+set timeout 1
+
+expect_after {
+    "\[^\r\n\]*\r\n" { fail $test }
+}
+
+set test "basic test"
+send "testtest\r"
+expect {
+    "8\r\n" { pass $test }
+}
+
+set test "CJK kanji"
+send "漢字\r"
+expect {
+    "4\r\n" { pass $test }
+}
+
+set test "CJK hiragana"
+send "ひらがな\r"
+expect {
+    "8\r\n" { pass $test }
+}
+
+set test "CJK katakana"
+send "カタカナ\r"
+expect {
+    "8\r\n" { pass $test }
+}
+
+set test "Fullwidth symbols (1)"
+send "〜!”#$%&’())\r"
+expect {
+    "20\r\n" { pass $test }
+}
+
+set test "Halfwidth symbols assumed to be fullwidth in JISX0208 (2)"
+send "○●◎\r"
+expect {
+    "3\r\n" { pass $test }
+}
+
+close
+
+# vim: sts=4 sw=4 ts=4 et encoding=UTF-8
index e7e5c2abe5f0576287c06a4affdee7ab452998fb..c73b9d8f936ca5defee1ad34b9af3e137012d4c5 100644 (file)
@@ -151,9 +151,16 @@ static const enum mbfl_no_encoding php_mb_default_identify_list_hy[] = {
 static const enum mbfl_no_encoding php_mb_default_identify_list_tr[] = {
        mbfl_no_encoding_ascii,
        mbfl_no_encoding_utf8,
+       mbfl_no_encoding_cp1254,
        mbfl_no_encoding_8859_9
 };
 
+static const enum mbfl_no_encoding php_mb_default_identify_list_ua[] = {
+       mbfl_no_encoding_ascii,
+       mbfl_no_encoding_utf8,
+       mbfl_no_encoding_koi8u
+};
+
 static const enum mbfl_no_encoding php_mb_default_identify_list_neut[] = {
        mbfl_no_encoding_ascii,
        mbfl_no_encoding_utf8
@@ -168,6 +175,7 @@ static const php_mb_nls_ident_list php_mb_default_identify_list[] = {
        { mbfl_no_language_russian, php_mb_default_identify_list_ru, sizeof(php_mb_default_identify_list_ru) / sizeof(php_mb_default_identify_list_ru[0]) },
        { mbfl_no_language_armenian, php_mb_default_identify_list_hy, sizeof(php_mb_default_identify_list_hy) / sizeof(php_mb_default_identify_list_hy[0]) },
        { mbfl_no_language_turkish, php_mb_default_identify_list_tr, sizeof(php_mb_default_identify_list_tr) / sizeof(php_mb_default_identify_list_tr[0]) },
+       { mbfl_no_language_ukrainian, php_mb_default_identify_list_ua, sizeof(php_mb_default_identify_list_ua) / sizeof(php_mb_default_identify_list_ua[0]) },
        { mbfl_no_language_neutral, php_mb_default_identify_list_neut, sizeof(php_mb_default_identify_list_neut) / sizeof(php_mb_default_identify_list_neut[0]) }
 };
 
@@ -360,7 +368,7 @@ ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_detect_encoding, 0, 0, 1)
        ZEND_ARG_INFO(0, strict)
 ZEND_END_ARG_INFO()
 
-ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_list_encodings, 0, 0, 0)
+ZEND_BEGIN_ARG_INFO(arginfo_mb_list_encodings, 0)
 ZEND_END_ARG_INFO()
 
 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_encoding_aliases, 0, 0, 1)
@@ -1035,75 +1043,72 @@ static PHP_INI_MH(OnUpdate_mbstring_http_output)
 int _php_mb_ini_mbstring_internal_encoding_set(const char *new_value, uint new_value_length TSRMLS_DC)
 {
        enum mbfl_no_encoding no_encoding;
-       const char *enc_name = NULL;
-       uint enc_name_len = 0;
-  
-       no_encoding = new_value ? mbfl_name2no_encoding(new_value):
-                               mbfl_no_encoding_invalid;
+       const char *enc_name = NULL;
+       uint enc_name_len = 0;
+   
+       no_encoding = new_value ? mbfl_name2no_encoding(new_value):
+                               mbfl_no_encoding_invalid;
        if (no_encoding != mbfl_no_encoding_invalid) {
-               enc_name = new_value;
-               enc_name_len = new_value_length;
-       } else {
-               switch (MBSTRG(language)) {
-                       case mbfl_no_language_uni:
-                               enc_name = "UTF-8";
-                               enc_name_len = sizeof("UTF-8") - 1;
-                               break;
-                       case mbfl_no_language_japanese:
-                               enc_name = "EUC-JP";
-                               enc_name_len = sizeof("EUC-JP") - 1;
-                               break;
-                       case mbfl_no_language_korean:
-                               enc_name = "EUC-KR";
-                               enc_name_len = sizeof("EUC-KR") - 1;
-                               break;
-                       case mbfl_no_language_simplified_chinese:
-                               enc_name = "EUC-CN";
-                               enc_name_len = sizeof("EUC-CN") - 1;
-                               break;
-                       case mbfl_no_language_traditional_chinese:
-                               enc_name = "EUC-TW";
-                               enc_name_len = sizeof("EUC-TW") - 1;
-                               break;
-                       case mbfl_no_language_russian:
-                               enc_name = "KOI8-R";
-                               enc_name_len = sizeof("KOI8-R") - 1;
-                               break;
-                       case mbfl_no_language_german:
-                               enc_name = "ISO-8859-15";
-                               enc_name_len = sizeof("ISO-8859-15") - 1;
-                               break;
-                       case mbfl_no_language_armenian:
-                               enc_name = "ArmSCII-8";
-                               enc_name_len = sizeof("ArmSCII-8") - 1;
-                               break;
-                       case mbfl_no_language_turkish:
-                               enc_name = "ISO-8859-9";
-                               enc_name_len = sizeof("ISO-8859-9") - 1;
-                               break;
-                       default:
-                               enc_name = "ISO-8859-1";
-                               enc_name_len = sizeof("ISO-8859-1") - 1;
-                               break;
-               }
-               no_encoding = mbfl_name2no_encoding(enc_name);
-       }
-       MBSTRG(internal_encoding) = no_encoding;
-       MBSTRG(current_internal_encoding) = no_encoding;
+               enc_name = new_value;
+               enc_name_len = new_value_length;
+       } else {
+               switch (MBSTRG(language)) {
+                       case mbfl_no_language_uni:
+                               enc_name = "UTF-8";
+                               enc_name_len = sizeof("UTF-8") - 1;
+                               break;
+                       case mbfl_no_language_japanese:
+                               enc_name = "EUC-JP";
+                               enc_name_len = sizeof("EUC-JP") - 1;
+                               break;
+                       case mbfl_no_language_korean:
+                               enc_name = "EUC-KR";
+                               enc_name_len = sizeof("EUC-KR") - 1;
+                               break;
+                       case mbfl_no_language_simplified_chinese:
+                               enc_name = "EUC-CN";
+                               enc_name_len = sizeof("EUC-CN") - 1;
+                               break;
+                       case mbfl_no_language_traditional_chinese:
+                               enc_name = "EUC-TW";
+                               enc_name_len = sizeof("EUC-TW") - 1;
+                               break;
+                       case mbfl_no_language_russian:
+                               enc_name = "KOI8-R";
+                               enc_name_len = sizeof("KOI8-R") - 1;
+                               break;
+                       case mbfl_no_language_german:
+                               enc_name = "ISO-8859-15";
+                               enc_name_len = sizeof("ISO-8859-15") - 1;
+                               break;
+                       case mbfl_no_language_armenian:
+                               enc_name = "ArmSCII-8";
+                               enc_name_len = sizeof("ArmSCII-8") - 1;
+                               break;
+                       case mbfl_no_language_turkish:
+                               enc_name = "ISO-8859-9";
+                               enc_name_len = sizeof("ISO-8859-9") - 1;
+                               break;
+                       default:
+                               enc_name = "ISO-8859-1";
+                               enc_name_len = sizeof("ISO-8859-1") - 1;
+                               break;
+               }
+               no_encoding = mbfl_name2no_encoding(enc_name);
+       }
+       MBSTRG(internal_encoding) = no_encoding;
+       MBSTRG(current_internal_encoding) = no_encoding;
 #if HAVE_MBREGEX
        {
-               const char *_enc_name = enc_name;
-               if (FAILURE == php_mb_regex_set_default_mbctype(_enc_name TSRMLS_CC)) {
+               const char *enc_name = new_value;
+               if (FAILURE == php_mb_regex_set_default_mbctype(enc_name TSRMLS_CC)) {
                        /* falls back to EUC-JP if an unknown encoding name is given */
-                       _enc_name = "EUC-JP";
-                       php_mb_regex_set_default_mbctype(_enc_name TSRMLS_CC);
+                       enc_name = "EUC-JP";
+                       php_mb_regex_set_default_mbctype(enc_name TSRMLS_CC);
                }
-               php_mb_regex_set_mbctype(_enc_name TSRMLS_CC);
+               php_mb_regex_set_mbctype(new_value TSRMLS_CC);
        }
 #endif
-#ifdef ZEND_MULTIBYTE
-       zend_multibyte_set_internal_encoding(new_value, new_value_length TSRMLS_CC);
-#endif /* ZEND_MULTIBYTE */
        return SUCCESS;
 }
 /* }}} */
@@ -1253,28 +1258,22 @@ PHP_INI_BEGIN()
        PHP_INI_ENTRY("mbstring.script_encoding", NULL, PHP_INI_ALL, OnUpdate_mbstring_script_encoding)
 #endif /* ZEND_MULTIBYTE */
        PHP_INI_ENTRY("mbstring.substitute_character", NULL, PHP_INI_ALL, OnUpdate_mbstring_substitute_character)
-       STD_PHP_INI_ENTRY("mbstring.func_overload", "0",
-               PHP_INI_SYSTEM | PHP_INI_PERDIR,
-               OnUpdateLong,
-               func_overload,
-               zend_mbstring_globals, mbstring_globals)
-                                                                                 
+       STD_PHP_INI_ENTRY("mbstring.func_overload", "0", 
+       PHP_INI_SYSTEM, OnUpdateLong, func_overload, zend_mbstring_globals, mbstring_globals)
+
        STD_PHP_INI_BOOLEAN("mbstring.encoding_translation", "0",
                PHP_INI_SYSTEM | PHP_INI_PERDIR,
                OnUpdate_mbstring_encoding_translation, 
-               encoding_translation,
-               zend_mbstring_globals, mbstring_globals)
-
+               encoding_translation, zend_mbstring_globals, mbstring_globals)                                   
        PHP_INI_ENTRY("mbstring.http_output_conv_mimetypes",
-               "^(text/|application/xhtml\\+xml)",
-               PHP_INI_ALL,
-               OnUpdate_mbstring_http_output_conv_mimetypes)
+               "^(text/|application/xhtml\\+xml)",
+               PHP_INI_ALL,
+               OnUpdate_mbstring_http_output_conv_mimetypes)
 
        STD_PHP_INI_BOOLEAN("mbstring.strict_detection", "0",
                PHP_INI_ALL,
                OnUpdateLong,
-               strict_detection,
-               zend_mbstring_globals, mbstring_globals)
+               strict_detection, zend_mbstring_globals, mbstring_globals)
 PHP_INI_END()
 /* }}} */
 
@@ -1333,11 +1332,9 @@ static PHP_GSHUTDOWN_FUNCTION(mbstring)
        if (mbstring_globals->detect_order_list) {
                free(mbstring_globals->detect_order_list);
        }
-
        if (mbstring_globals->http_output_conv_mimetypes) {
                _php_mb_free_regex(mbstring_globals->http_output_conv_mimetypes);
        }
-
 #if HAVE_MBREGEX
        php_mb_regex_globals_free(mbstring_globals->mb_regex_globals TSRMLS_CC);
 #endif
@@ -1450,6 +1447,7 @@ PHP_RINIT_FUNCTION(mbstring)
        PHP_RINIT(mb_regex) (INIT_FUNC_ARGS_PASSTHRU);
 #endif
 #ifdef ZEND_MULTIBYTE
+       zend_multibyte_set_internal_encoding(mbfl_no_encoding2name(MBSTRG(internal_encoding)) TSRMLS_CC);
        php_mb_set_zend_encoding(TSRMLS_C);
 #endif /* ZEND_MULTIBYTE */
 
@@ -1578,7 +1576,7 @@ PHP_FUNCTION(mb_internal_encoding)
 #ifdef ZEND_MULTIBYTE
                        /* TODO: make independent from mbstring.encoding_translation? */
                        if (MBSTRG(encoding_translation)) {
-                               zend_multibyte_set_internal_encoding(name, name_len TSRMLS_CC);
+                               zend_multibyte_set_internal_encoding(name TSRMLS_CC);
                        }
 #endif /* ZEND_MULTIBYTE */
                        RETURN_TRUE;
@@ -3071,18 +3069,13 @@ PHP_FUNCTION(mb_detect_encoding)
 /* }}} */
 
 /* {{{ proto mixed mb_list_encodings()
-   Returns an array of all supported entity encodings or Returns the entity encoding as a string */
+   Returns an array of all supported entity encodings */
 PHP_FUNCTION(mb_list_encodings)
 {
        const mbfl_encoding **encodings;
        const mbfl_encoding *encoding;
        int i;
 
-       if (ZEND_NUM_ARGS() != 0) {
-               RETVAL_FALSE;
-               ZEND_WRONG_PARAM_COUNT();
-       }
-
        array_init(return_value);
        i = 0;
        encodings = mbfl_get_supported_encodings();
@@ -3319,8 +3312,8 @@ PHP_FUNCTION(mb_convert_variables)
        int n, to_enc_len, argc, stack_level, stack_max, elistsz;
        enum mbfl_no_encoding *elist;
        char *name, *to_enc;
-       void *ptmp;
-
+       void *ptmp;     
+       
        if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "sZ+", &to_enc, &to_enc_len, &zfrom_enc, &args, &argc) == FAILURE) {
                return;
        }
@@ -3485,7 +3478,7 @@ detect_end:
                                                        ret = mbfl_buffer_converter_feed_result(convd, &string, &result);
                                                        if (ret != NULL) {
                                                                if (Z_REFCOUNT_PP(hash_entry) > 1) {
-                                                                       Z_DELREF_P(*hash_entry);
+                                                                       Z_DELREF_PP(hash_entry);
                                                                        MAKE_STD_ZVAL(*hash_entry);
                                                                } else {
                                                                        zval_dtor(*hash_entry);
@@ -3864,7 +3857,7 @@ PHP_FUNCTION(mb_send_mail)
        smart_str *s;
        extern void mbfl_memory_device_unput(mbfl_memory_device *device);
        char *pp, *ee;
-
+    
        /* initialize */
        mbfl_memory_device_init(&device, 0, 0);
        mbfl_string_init(&orig_str);
@@ -4501,8 +4494,7 @@ MBSTRING_API size_t php_mb_gpc_mbchar_bytes(const char *s TSRMLS_DC)
 /* }}} */
 
 /*     {{{ MBSTRING_API int php_mb_gpc_encoding_converter() */
-MBSTRING_API int php_mb_gpc_encoding_converter(char **str, int *len, int num, const char *encoding_to, const char *encoding_from 
-               TSRMLS_DC)
+MBSTRING_API int php_mb_gpc_encoding_converter(char **str, int *len, int num, const char *encoding_to, const char *encoding_from TSRMLS_DC)
 {
        int i;
        mbfl_string string, result, *ret = NULL;
@@ -4722,8 +4714,9 @@ MBSTRING_API int php_mb_stripos(int mode, const char *old_haystack, unsigned int
 /* }}} */
 
 #ifdef ZEND_MULTIBYTE
-/* {{{ MBSTRING_API int php_mb_set_zend_encoding() */
-MBSTRING_API int php_mb_set_zend_encoding(TSRMLS_D)
+
+/* {{{ php_mb_set_zend_encoding() */
+static int php_mb_set_zend_encoding(TSRMLS_D)
 {
        /* 'd better use mbfl_memory_device? */
        char *name, *list = NULL;
@@ -4763,7 +4756,7 @@ MBSTRING_API int php_mb_set_zend_encoding(TSRMLS_D)
        if (MBSTRG(encoding_translation)) {
                /* notify internal encoding to Zend Engine */
                name = (char*)mbfl_no_encoding2name(MBSTRG(current_internal_encoding));
-               zend_multibyte_set_internal_encoding(name, strlen(name) TSRMLS_CC);
+               zend_multibyte_set_internal_encoding(name TSRMLS_CC);
        }
 
        zend_multibyte_set_functions(encoding_detector, encoding_converter, encoding_oddlen TSRMLS_CC);
@@ -4775,7 +4768,7 @@ MBSTRING_API int php_mb_set_zend_encoding(TSRMLS_D)
 /* {{{ char *php_mb_encoding_detector()
  * Interface for Zend Engine
  */
-char* php_mb_encoding_detector(const char *arg_string, int arg_length, char *arg_list TSRMLS_DC)
+static char* php_mb_encoding_detector(const unsigned char *arg_string, size_t arg_length, char *arg_list TSRMLS_DC)
 {
        mbfl_string string;
        const char *ret;
@@ -4798,7 +4791,7 @@ char* php_mb_encoding_detector(const char *arg_string, int arg_length, char *arg
 
        mbfl_string_init(&string);
        string.no_language = MBSTRG(language);
-       string.val = (char*)arg_string;
+       string.val = (unsigned char *)arg_string;
        string.len = arg_length;
        ret = mbfl_identify_encoding_name(&string, elist, size, 0);
        if (list != NULL) {
@@ -4813,9 +4806,9 @@ char* php_mb_encoding_detector(const char *arg_string, int arg_length, char *arg
 /* }}} */
 
 /*     {{{ int php_mb_encoding_converter() */
-int php_mb_encoding_converter(char **to, int *to_length, const char *from,
-               int from_length, const char *encoding_to, const char *encoding_from 
-               TSRMLS_DC)
+static int php_mb_encoding_converter(unsigned char **to, size_t *to_length,
+               const unsigned char *from, size_t from_length,
+               const char *encoding_to, const char *encoding_from TSRMLS_DC)
 {
        mbfl_string string, result, *ret;
        enum mbfl_no_encoding from_encoding, to_encoding;
@@ -4836,7 +4829,7 @@ int php_mb_encoding_converter(char **to, int *to_length, const char *from,
        mbfl_string_init(&result);
        string.no_encoding = from_encoding;
        string.no_language = MBSTRG(language);
-       string.val = (char*)from;
+       string.val = (unsigned char*)from;
        string.len = from_length;
 
        /* initialize converter */
@@ -4865,14 +4858,14 @@ int php_mb_encoding_converter(char **to, int *to_length, const char *from,
  *     returns number of odd (e.g. appears only first byte of multibyte
  *     character) chars
  */
-int php_mb_oddlen(const char *string, int length, const char *encoding TSRMLS_DC)
+static size_t php_mb_oddlen(const unsigned char *string, size_t length, const char *encoding TSRMLS_DC)
 {
        mbfl_string mb_string;
 
        mbfl_string_init(&mb_string);
        mb_string.no_language = MBSTRG(language);
        mb_string.no_encoding = mbfl_name2no_encoding(encoding);
-       mb_string.val = (char*)string;
+       mb_string.val = (unsigned char *)string;
        mb_string.len = length;
 
        if (mb_string.no_encoding == mbfl_no_encoding_invalid) {
index cd6cc63b3fbb3e95dfd608fadeebf07da5512a7e..c536183538815a159bef1e48b7025cc951432544 100644 (file)
@@ -217,16 +217,6 @@ struct mb_overload_def {
 #define MBSTRG(v) (mbstring_globals.v)
 #endif
 
-#ifdef ZEND_MULTIBYTE
-MBSTRING_API int php_mb_set_zend_encoding(TSRMLS_D);
-char* php_mb_encoding_detector(const char *string, int length, char *list
-               TSRMLS_DC);
-int php_mb_encoding_converter(char **to, int *to_length, const char *from,
-               int from_length, const char *encoding_to, const char *encoding_from
-               TSRMLS_DC);
-int php_mb_oddlen(const char *string, int length, const char *encoding TSRMLS_DC);
-#endif /* ZEND_MULTIBYTE */
-
 #else  /* HAVE_MBSTRING */
 
 #define mbstring_module_ptr NULL
index ed3fa53b25370a8d49490907a5db2c6c3a24498b..4d321bb93bb68221509b97afeea2d21546b8e54b 100644 (file)
@@ -1,4 +1,4 @@
-OniGuruma LICENSE
+Oniguruma LICENSE
 -----------------
 
 When this software is partly used or it is distributed with Ruby, 
@@ -6,7 +6,7 @@ this of Ruby follows the license of Ruby.
 It follows the BSD license in the case of the one except for it.
 
 /*-
- * Copyright (c) 2002-2004  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp>
+ * Copyright (c) 2002-2006  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
index c648c5455117e991a265e5edd942aeabb19a399b..a1debefa490b509fc7b8c11ed616474fae4d5366 100644 (file)
@@ -1,5 +1,457 @@
 History
 
+2007/08/16: Version 4.7.1
+
+2007/08/16: [test] success in ruby 1.9.0 (2007-04-06) [i686-linux].
+2007/07/04: [spec] (thanks K.Takata)
+                   ONIG_OPTION_SINGLELINE: '$' -> '\Z'  (as Perl)
+2007/07/04: [dist] (thanks K.Takata)
+                   fix documents API and API.ja.
+
+2007/06/18: Version 4.7.0
+
+2007/06/18: [test] success in ruby 1.9.0 (2007-04-06) [i686-linux].
+2007/06/18: [bug]  (thanks KUBO Takehiro)
+                   WORD_ALIGNMENT_SIZE must be sizeof(OnigCodePoint).
+2007/06/05: [impl] add #ifndef vsnprintf in regint.h.
+2007/06/05: [bug]  should check USE_CRNL_AS_LINE_TERMINATOR case
+                   in onig_search().
+
+2007/04/12: Version 4.6.2
+
+2007/04/09: [impl] change STATE_CHECK_BUFF_MAX_SIZE value from 0x8000
+                   to 0x4000.
+2007/03/26: [impl] add 'void' to function declarations.
+
+2007/03/06: Version 4.6.1
+
+2007/03/06: [test] success in ruby 1.9.0 (2006-10-23) [i686-linux].
+2007/03/06: [bug]  add #include <malloc.h> for bcc32.
+                   (In bcc32, alloca() is declared in malloc.h.)
+2007/03/06: [impl] remove including version.h of Ruby.
+2007/03/02: [bug]  invalid optimization for semi-end-buf in onig_search().
+                   ex. /\n\Z/.match("aaaaaaaaaa\n")
+2007/03/02: [impl] move range > start check position in end_buf process.
+
+2007/02/08: Version 4.6.0
+
+2007/02/08: [test] success in ruby 1.9.0 (2006-10-23) [i686-linux].
+2007/01/09: [tune] select_opt_exact_info() didn't work for empty info.
+                   ex. /.a/ make MAP info instead of EXACT info.
+2006/12/29: [impl] add print_enc_string() for ONIG_DEBUG mode.
+2006/12/22: [spec] should check too short multibyte char in parse_exp().
+                   add USE_PAD_TO_SHORT_BYTE_CHAR.
+                   ex. /\x00/ in UTF16 should be error.
+
+2006/11/17: Version 4.5.1
+
+2006/11/17: [test] success in ruby 1.9.0 (2006-10-23) [i686-linux].
+2006/11/15: [impl] remove CHECK_INTERRUPT.
+2006/11/10: [bug]  0x24, 0x2b, 0x3c, 0x3d, 0x3e, 0x5e, 0x60, 0x7c, 0x7e
+                   should be [:punct:].
+2006/11/08: [impl] rename QUALIFIER -> QUANTIFIER.
+2006/11/07: [bug]  (thanks Byte)
+                   add 0xa3 <=> 0xb3 to CaseFoldMap[] for KOI8-R.
+
+2006/11/06: Version 4.5.0
+
+2006/11/06: [test] success in ruby 1.9.0 (2006-10-23) [i686-linux].
+2006/11/06: [API]  remove ONIGENC_AMBIGUOUS_MATCH_COMPOUND.
+2006/11/06: [spec] change ONIG_OPTION_FIND_LONGEST to search all of 
+                   the string range.
+                   add USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE.
+
+2006/10/30: Version 4.4.6
+
+2006/10/30: [test] success in ruby 1.9.0 (2006-10-23) [i686-linux].
+2006/10/30: [impl] (thanks K.Takata)
+                   add THREAD_SYSTEM_INIT and THREAD_SYSTEM_END.
+2006/10/30: [bug]  (thanks Wolfgang Nadasi-Donner)
+                   invalid offset value was used in STATE_CHECK_BUFF_INIT().
+
+2006/10/24: Version 4.4.5
+
+2006/10/24: [test] success in ruby 1.9.0 (2006-10-23) [i686-linux].
+2006/10/24: [impl] escape -Wall warning.
+2006/10/24: [tune] (thanks Kornelius Kalnbach)
+                   String#scan for long string needs long time compare with
+                   old Ruby
+                   by initialization time for combination explosion check
+                   ex. ("test " * 100_000).scan(/\w*\s?/)
+                   change STATE_CHECK_BUFF_MAX_SIZE from 0x8000000 to 0x8000.
+                   reduce initialization area of state_check_buff.
+2006/10/16: [bug]  (thanks Akinori Musha)
+                   first argument of rb_warn() should be format string.
+2006/10/10: [impl] add msa.state_check_buff_size initialization
+                   in onig_search().
+2006/10/10: [bug]  should call onig_st_free_table() in
+                   onig_free_shared_cclass_table().
+2006/10/10: [impl] remove OP_WORD_SB and OP_WORD_MB.
+2006/09/29: [impl] initialize state_check_buff_size in STATE_CHECK_BUFF_INIT().
+                   make valgrind happy.
+2006/09/22: [impl] convert to ascii for parameter string in
+                   onig_error_code_to_str().
+                   add enc member into OnigErrorInfo.
+
+2006/09/19: Version 4.4.4
+
+2006/09/19: [test] success in ruby 1.9.0 (2006-08-22) [i686-linux].
+2006/09/19: [impl] (thanks KOYAMA Tetsuji)
+                   HAVE_STDARG_PROTOTYPES was not defined in Mac OS X
+                   by Xcode 2.4(gcc 4.0.1) problem. [php-dev 1312] etc...
+
+2006/09/15: Version 4.4.3
+
+2006/09/15: [test] success in ruby 1.9.0 (2006-08-22) [i686-linux].
+2006/09/15: [bug]  (thanks Allan Odgaard)
+                   out of range access in bm_search_notrev().
+                   (p < s)
+
+2006/09/08: Version 4.4.2
+
+2006/09/08: [test] success in ruby 1.9.0 (2006-08-22) [i686-linux].
+2006/09/08: [bug]  (thanks K.Takata)
+                   out of range access in bm_search_notrev().
+2006/09/04: [spec] (thanks K.Takata)
+                   allow look-behind in negative look-behind.
+                   ex. /(?<!(?<=a)b|c)d/
+
+2006/08/29: Version 4.4.1
+
+2006/08/29: [test] success in ruby 1.9.0 (2006-08-22) [i686-linux].
+2006/08/29: [dist] (thanks Seiji Masugata)
+                    add configure option --enable-combination-explosion-check
+
+2006/08/25: Version 4.4.0
+
+2006/08/25: [test] success in ruby 1.9.0 (2006-08-22) [i686-linux].
+2006/08/25: [impl] add_state_check_num() should be enclosed in
+                   ifdef USE_COMBINATION_EXPLOSION_CHECK.
+2006/08/23: [spec] config USE_COMBINATION_EXPLOSION_CHECK is enabled
+                   in Ruby mode only.
+2006/08/22: [impl] remove last line comma in enum OpCode.
+2006/08/22: [impl] remove OP_STATE_CHECK_ANYCHAR_STAR_PEEK_NEXT and
+                   OP_STATE_CHECK_ANYCHAR_ML_STAR_PEEK_NEXT.
+2006/08/22: [impl] remove OP_BACKREF3.
+
+2006/08/21: Version 4.3.1
+
+2006/08/21: [test] success in ruby 1.9.0 (2006-07-28) [i686-linux].
+2006/08/21: [impl] change stack type values
+                   and re-define STK_MASK_TO_VOID_TARGET etc...
+2006/08/21: [impl] set repeat_range[].upper to 0x7fffffff as infinite.
+2006/08/21: [impl] add STATE_CHECK_BUFF_MALLOC_THRESHOLD_SIZE.
+2006/08/21: [impl] reduce (?:a*){n,m}, (?:a+){n,m} => (?:a*){n,n}, (?:a+){n,n}
+2006/09/21: [impl] reduce (a*){n,m}, (a+){n,m} => (a*){n,n}, (a+){n,n}
+                   if backreference is not used.
+2006/08/17: [bug]  should check scan_env.num_call > 0 for backrefed pattern
+                   in combination explosion check.
+
+2006/08/17: Version 4.3.0
+
+2006/08/17: [test] success in ruby 1.9.0 (2006-07-28) [i686-linux].
+2006/08/17: [new]  add config USE_COMBINATION_EXPLOSION_CHECK.
+                   check /(.+)*/, /(\s*foo\s*)*/ etc...
+            [API]  add num_comb_exp_check member in regex_t.
+            [dist] change LTVERSION value to "1:0:0" in configure.in.
+2006/08/15: [bug]  OP_REPEAT_INC process in match_at().
+                   should check repeat-count >= range-upper and
+                   range-upper may be infinite.
+
+2006/08/11: Version 4.2.3
+
+2006/08/11: [test] success in ruby 1.9.0 (2006-07-28) [i686-linux].
+2006/08/10: [impl] remove double call in set_qualifier().
+2006/08/10: [impl] remove by_number member in QualifierNode.
+2006/08/09: [impl] remove a comma at the end of enum ReduceType
+                   for escape warning on Mac OS X.
+2006/08/07: [impl] remove warning in regcomp.c.
+2006/08/07: [spec] move definition of USE_BACKREF_AT_LEVEL into NOT_RUBY.
+
+2006/08/03: Version 4.2.2
+
+2006/08/03: [test] success in ruby 1.9.0 (2006-07-28) [i686-linux].
+2006/08/03: [bug]  (thanks Hiroyuki Yamamoto)
+                   segmentation fault in regexec(). (POSIX API)
+2006/08/02: [bug]  combination of \G in look-ahead/look-behind and other
+                   anchors(\A, \z, \Z) cause invalid result.
+                   ex. /(?!\G)a\z/.match("ba")
+                   start arg. of MATCH_ARG_INIT() should be original
+                   arg. of onig_search().
+
+2006/07/31: Version 4.2.1
+
+2006/07/31: [test] success in ruby 1.9.0 (2006-07-28) [i686-linux].
+2006/07/31: [bug] (thanks Kimura Minoru)
+                   re-implement bm_search_notrev().
+2006/07/31: [impl] bm_search_notrev() refactoring.
+2006/07/31: [bug]  (thanks Kimura Minoru)
+                   fix incomplete multibyte string in exact info.
+2006/07/31: [impl] (thanks Seiji Masugata)
+                   remove cast in va_init_list() for Intel C Compiler.
+
+2006/07/18: Version 4.2.0
+
+2006/07/18: [test] success in ruby 1.9.0 (2006-03-01) [i686-linux].
+2006/07/18: [new]  (thanks Wolfgang Nadasi-Donner)
+                   add back reference with nest level.
+                   \k<name+n>, \k<name-n>
+2006/07/11: [impl] change long to unsigned long for ONIG_OPTION_XXX
+                   and ONIG_SYN_XXX number literals.
+
+2006/07/03: Version 4.1.2
+
+2006/07/03: [test] success in ruby 1.9.0 (2006-03-01) [i686-linux].
+2006/07/03: [spec] (thanks Wolfgang Nadasi-Donner)
+                   allow \G in look-behind.
+                   add ANCHOR_BEGIN_POSITION flag in setup_tree().
+2006/06/12: [impl] (thanks matz)
+                    fix cast from char* to const char*
+                    in onig_snprintf_with_pattern().
+                    fix cast from char* to const char*
+                    for PopularQStr[] and ReduceQStr[].
+
+2006/05/22: Version 4.1.1
+
+2006/05/22: [test] success in ruby 1.9.0 (2006-03-01) [i686-linux].
+2006/05/22: [impl] add position string argument to STACK_BASE_CHECK().
+2006/05/22: [bug]  (thanks NARUSE, Yui)
+                   add STK_NULL_CHECK_END to IS_TO_VOID_TARGET().
+                   ex. core dump in
+                   /(?<pare>\(([^\(\)]++|\g<pare>)*+\))/.match('((a))')
+
+2006/05/15: Version 4.1.0
+
+2006/05/15: [test] success in ruby 1.9.0 (2006-03-01) [i686-linux].
+2006/05/15: [impl] thread atomic changes for onig_end() and
+                   onig_free_node_list().
+2006/05/15: [test] success in ruby 1.9.0 (2006-03-01) [i686-linux].
+2005/05/15: [dist] update API, API.ja, FAQ, FAQ.ja.
+2006/05/15: [spec] remove onig_recompile(), onig_recompile_deluxe()
+                   and re_recompile_pattern().
+                   add config USE_RECOMPILE_API.
+2006/05/15: [impl] improved thread safe implementation of onig_search()
+                   and onig_match().
+
+2006/05/11: Version 4.0.4
+
+2006/05/11: [test] success in ruby 1.9.0 (2006-03-01) [i686-linux].
+2006/05/11: [bug]  (thanks Yuji Kaneda)
+                   dead-lock in onig_end().              
+2006/05/11: [dist] update index.html.
+
+2006/05/08: Version 4.0.3
+
+2006/05/08: [test] success in ruby 1.9.0 (2006-03-01) [i686-linux].
+2006/05/08: [bug]  (thanks Allan Odgaard)
+                   Segmentation fault in backward search.
+                   ex. /^\t.*$/
+2006/04/18: [dist] update index.html.
+2006/04/05: [dist] update index.html.
+2006/03/24: [dist] update doc/RE, doc/RE.ja.
+
+2006/03/23: Version 4.0.2
+
+2006/03/22: [test] success in ruby 1.9.0 (2006-03-01) [i686-linux].
+2006/03/22: [impl] add both of ONIG_OPTION_DONT_CAPTURE_GROUP
+                   and ONIG_OPTION_CAPTURE_GROUP check.
+2006/03/22: [spec] add error code ONIGERR_INVALID_COMBINATION_OF_OPTIONS.
+2006/03/22: [impl] remove USE_NAMED_GROUP condition from
+                   ONIG_OPTION_DONT_CAPTURE_GROUP check in parse_effect().
+2006/03/22: [new]  add API onig_noname_group_capture_is_active().
+2006/03/01: [spec] rename regex object type from regex_t to OnigRegexType.
+                   add typedef OnigRegexType regex_t
+                   unless ONIG_ESCAPE_REGEX_T_COLLISION is defined.
+2006/02/27: [spec] change ONIG_MAX_MULTI_BYTE_RANGES_NUM from 1000
+                   to 10000.  (for docdiff program)
+2006/02/17: [dist] change COPYING year 2005 -> 2006.
+
+2006/02/07: Version 4.0.1
+
+2006/02/07: [test] success in ruby 1.9.0 (2005-11-28) [i686-linux].
+2006/02/07: [bug]  memory leaks in onig_free_shared_cclass_table().
+2006/02/03: [ruby] add -m 0644 option to install command in "make 19".
+2006/02/03: [impl] rename ANCHOR_ANYCHAR_STAR_PL to ANCHOR_ANYCHAR_STAR_ML.
+                   change from IS_POSIXLINE() to IS_MULTILINE()
+                   for ANCHOR_ANYCHAR_START/_ML decision
+                   in optimize_node_left().
+2006/01/26: [dist] update index.html for Oniguruma 2.5.3.
+2006/01/25: [dist] update URL in index.html.
+
+2006/01/24: Version 4.0.0
+
+2006/01/24: [test] success in ruby 1.9.0 (2005-11-28) [i386-cygwin].
+2006/01/24: [test] success in ruby 1.9.0 (2005-11-28) [i686-linux].
+2006/01/24: [dist] remove warnings from sample/encode.c.
+2006/01/24: [dist] change install description in README(.ja).
+2006/01/24: [dist] remove re.c.XXX.patch from distribution and CVS.
+2006/01/24: [dist] --- support shared library ---
+                   use GNU libtool/automake.
+                   change configure.in and add Makefile.am, sample/Makefile.am.
+                   add AUTHORS file.
+2006/01/24: [dist] test programs return exit code -1 when test fails. 
+2006/01/24: [bug] (thanks KIMURA Koichi)
+                   invalid syntax definition in ONIG_SYNTAX_GREP.
+                   ONIG_SYN_OP_BRACE_INTERVAL
+                   -> ONIG_SYN_OP_ESC_BRACE_INTERVAL
+2006/01/23: [dist] fix configure.in for onig-config.
+2006/01/19: [new]  add new config USE_UNICODE_ALL_LINE_TERMINATORS.
+                   (U+000d, U+0085, U+2028, U+2029)
+2005/12/29: [dist] change pmatch array size to 25 in testconv.rb.
+2005/12/26: [dist] fix name in test.rb.
+2005/12/26: [dist] update index.html for 2.5.1.
+
+2005/11/29: Version 3.9.1
+
+2005/11/29: [test] success in ruby 1.9.0 (2005-11-28) [i686-linux].
+2005/11/24: [test] success in ruby 1.9.0 (2005-08-09) [i686-linux].
+2005/11/21: [test] success in ruby 1.9.0 (2005-11-20) [i386-cygwin].
+2005/11/21: [bug]  (thanks Allan Odgaard)
+                   utf-8 character comments in extended mode leads 
+                   invalid result.
+                   ex. /(?x)(?<= # <any-utf-8 multibyte char>o\n~) /
+                   fix onigenc_unicode_is_code_ctype() and
+                   utf8_is_code_ctype().
+2005/11/20: [bug]  (thanks MATSUMOTO Satoshi) (thanks Isao Sonobe)
+                   begin-line anchor and BM search optimization leads
+                   invalid result in UTF-16/32.
+                   fix in set_optimize_exact_info().
+
+2005/11/20: Version 3.9.0
+
+2005/11/20: [test] success in ruby 1.9.0 (2005-11-20) [i386-cygwin].
+2005/11/20: [test] success in ruby 1.9.0 (2005-10-18) [i386-cygwin].
+2005/11/20: [new]  add new config USE_CRNL_AS_LINE_TERMINATOR.
+                   (!!! NO SUPPORT experimental option !!!)
+2005/11/15: [bug]  (thanks Allan Odgaard)
+                   tok->escape was not cleared in fetch_token_in_cc().
+                   ex. [\s&&[^\n]] makes wrong result.
+2005/10/18: [impl] (thanks nobu)
+                   change sjis_mbc_enc_len()
+                   and node_new_cclass_by_codepoint_range() scope to static.
+2005/09/05: [dist] remove link to MultiFind.
+2005/09/01: [dist] add link to yagrep.
+
+2005/08/23: Version 3.8.9
+
+2005/08/23: [test] success in ruby 1.9.0 (2005-08-09) [i686-linux].
+2005/08/23: [inst] fix Makefile.in for make ctest/ptest.
+
+2005/08/23: Version 3.8.8
+
+2005/08/23: [test] success in ruby 1.9.0 (2005-08-09) [i686-linux].
+2005/08/23: [impl] split is_code_in_cc() from onig_is_code_in_cc().
+2005/08/23: [impl] should check DATA_ENSURE() at OP_CCLASS_NODE in match_at().
+2005/08/23: [impl] (thanks akr)
+                   add ONIG_OPTION_MAXBIT for escape conflict with
+                   Ruby's option.
+2005/08/22: [impl] escape GCC 4.0 warnings for testc.c.
+2005/08/22: [bug]  (thanks nobu, matz) [ruby-dev:26840]
+                   UTF-8 0xFE, 0xFF handling bug in code_is_in_cclass_node().
+                   abort on /\S*/ =~ "\xfe"
+2005/08/22: [impl] escape GCC 4.0 warnings for sample/*.c.
+2005/08/22: [impl] fix testconvu.rb.
+2005/08/22: [impl] escape GCC 4.0 warnings.
+
+2005/08/09: Version 3.8.7
+
+2005/08/09: [test] success in ruby 1.9.0 (2005-08-09) [i686-linux].
+2005/08/09: [bug]  (thanks Allan Odgaard)
+                   should not call enc_len() for s == range
+                   in onig_search().
+2005/08/01: [dist] add mkdir $prefix, mkdir $exec_prefix to make install.
+
+2005/07/27: Version 3.8.6
+
+2005/07/27: [test] success in ruby 1.9.0 (2005-07-26) [i686-linux].
+2005/07/27: [impl] update onig-config.in.
+2005/07/26: [new]  (thanks Yen-Ju Chen)
+                   add Oniguruma configuration check program.
+                   (onig-config.in)
+
+2005/07/14: Version 3.8.5
+
+2005/07/14: [test] success in ruby 1.9.0 (2005-07-14) [i686-linux].
+2005/07/11: [test] success in ruby 1.9.0 (2005-07-04) [i686-linux].
+2005/07/11: [bug]  (thanks nobu) [ruby-dev:26505]
+                   invalid handling for /\c\x/ and /\C-\x/.
+                   fix fetch_escaped_value().
+2005/07/05: [impl] (thanks Alexey Zakhlestine)
+                   escape GCC 4.0 warnings.
+
+2005/07/01: Version 3.8.4
+
+2005/07/01: [test] success in ruby 1.9.0 (2005-07-01) [i686-linux].
+2005/06/30: [test] success in ruby 1.9.0 (2005-06-28) [i686-linux].
+2005/06/30: [dist] add GB 18030 test to sample/encode.c.
+2005/06/30: [impl] escape warning of gb18030_left_adjust_char_head().
+2005/06/30: [new]  (contributed by KUBO Takehiro)
+                   add new character encoding ONIG_ENCODING_GB18030.
+2005/06/30: [bug]  invalid ctype check for multibyte encodings.
+                   ("graph", "print")
+                   fix onigenc_mb2/4_is_code_ctype(), 
+                   eucjp_is_code_ctype() and sjis_is_code_ctype().
+2005/06/30: [bug]  invalid conversion from code point to mbc in
+                   onigenc_mb4_code_to_mbc().
+
+2005/06/28: Version 3.8.3
+
+2005/06/28: [test] success in ruby 1.9.0 (2005-06-28) [i686-linux].
+2005/06/27: [test] success in ruby 1.9.0 (2005-05-31) [i686-linux].
+2005/06/27: [bug]  (thanks Wolfgang Nadasi-Donner)
+                   invalid check for never ending recursion.
+                   lower zero quantifier should be treated as
+                   a non-recursive call alternative.
+                   ex. /(?<bal>[^()]*(\(\g<bal>\)[^()]*)*)/
+2005/06/15: [impl] add divide_ambig_string_node_sub().
+2005/06/15: [dist] add a test to sample/encode.c.
+2005/06/10: [new]  add ONIG_SYNTAX_PERL_NG. (Perl + named group)
+
+2005/06/01: Version 3.8.2
+
+2005/06/01: [test] success in ruby 1.9.0 (2005-05-31) [i686-linux].
+2005/05/31: [dist] add doc/FAQ and doc/FAQ.ja.
+2005/05/31: [impl] minor change in node_new().
+2005/05/30: [test] success in ruby 1.9.0 (2005-05-11) [i686-linux].
+2005/05/30: [bug]  (thanks Allan Odgaard)
+                   FreeNodeList null check should be on thread-atomic
+                   in node_new().
+
+2005/05/11: Version 3.8.1
+
+2005/05/11: [test] success in ruby 1.9.0 (2005-05-11) [i386-mswin32].
+2005/05/11: [dist] update win32/Makefile (make 19).
+2005/05/11: [test] success in ruby 1.9.0 (2005-05-11) [i686-linux].
+2005/05/06: [test] success in ruby 1.9.0 (2005-05-06) [i686-linux].
+2005/05/06: [impl] (thanks nobu) [ruby-core:4815]
+                   add #ifdef USE_VARIABLE_META_CHARS to goto label.
+2005/04/25: [test] success in ruby 1.9.0 (2005-04-25) [i686-linux].
+2005/04/25: [impl] change DEFAULT_WARN_FUNCTION and DEFAULT_VERB_WARN_FUNCTION
+                   to onig_rb_warn() and onig_rb_warning().
+
+2005/04/15: Version 3.8.0
+
+2005/04/15: [test] success in ruby 1.9.0 (2005-04-14) [i686-linux].
+2005/04/01: [test] success in ruby 1.9.0 (2005-03-24) [i686-linux].
+2005/04/01: [impl] (thanks Joe Orton)
+                   (thanks Moriyoshi Koizumi)
+                   many const-ification to many *.[ch] files.
+
+2005/03/25: Version 3.7.2
+
+2005/03/25: [test] success in ruby 1.9.0 (2005-03-24) [i686-linux].
+2005/03/23: [test] success in ruby 1.9.0 (2005-03-20) [i686-linux].
+2005/03/23: [test] success in ruby 1.9.0 (2005-03-08) [i686-linux].
+2005/03/23: [new]  add ONIG_SYNTAX_ASIS.
+2005/03/23: [new]  add ONIG_SYN_OP2_INEFFECTIVE_ESCAPE.
+2005/03/09: [spec] rename MBCTYPE_XXX to RE_MBCTYPE_XXX. (GNU API)
+2005/03/08: [test] success in ruby 1.9.0 (2005-03-08) [i686-linux].
+2005/03/08: [impl] (thanks matz) [ruby-dev:25783]
+                   should not allocate memory for key data in st.c.
+                   move st_*_strend() functions from st.c. fixed some
+                  potential memory leaks.
+                   (imported from Ruby 1.9 2005-03-08)
+
 2005/03/07: Version 3.7.1
 
 2005/03/07: [test] success in ruby 1.9.0 (2005-03-07) [i686-linux].
@@ -24,7 +476,7 @@ History
                    remove reggnu.c from make 19.
 2005/02/19: [dist] update doc/API and doc/API.ja.
 2005/02/19: [test] success in ruby 1.9.0 (2005-02-19) [i386-cygwin].
-2005/02/19: [impl] (thanks Alexey Zakhlestin)
+2005/02/19: [impl] (thanks Alexey Zakhlestine)
                    change UChar* to const UChar* in oniguruma.h,
                    regenc.h and regparse.h.
 2005/02/13: [impl] change UChar* to const UChar* in oniguruma.h and
@@ -1358,16 +1810,29 @@ History
 [test: test]
 [memo: memo]
 --
-<branch>
-svn mkdir http://localhost/repos/branches -m ""
-svn mkdir http://localhost/repos/branches/oniguruma -m ""
-svn copy http://localhost/repos/trunk/oniguruma http://localhost/repos/branches/oniguruma/2.X -m "branch for 8-bit encodings only"
-
-<create tag>
-svn copy http://localhost/repos/trunk/oniguruma http://localhost/repos/tags/oniguruma/X.X.X -m "onigdXXXXXXXX"
-
-<show all tags>
+<CVS: show all tags>
 cvs history -T
 
-<add tag>
+<CVS: add tag>
 cvs rtag "VERSION_X_X_X" oniguruma
+
+
+<GNU Autotools: bootstrap>
+* write Makefile.am and configure.in.
+> aclocal
+> libtoolize
+> automake --foreign --add-missing
+> autoconf
+> configure --with-rubydir=... CFLAGS="-O2 -Wall"
+
+
+<GNU libtool: version management>
+
+  VERSION = current:revision:age
+
+  current:  interface number (from 0)
+  revision: implementation number of same interface (from 0)
+  age:      number of supported previous interfaces
+            (if current only supported then age == 0)
+
+//END
index dc4fb3b64b838aec32368bf6dc09717ac1fb7b7a..dff7fba5622517bc08ba0e48f0f245d4a9f1ac23 100644 (file)
@@ -1,9 +1,8 @@
-README  2005/02/04
+README  2007/06/18
 
 Oniguruma  ----   (C) K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
 
 http://www.geocities.jp/kosako3/oniguruma/
-http://www.ruby-lang.org/cgi-bin/cvsweb.cgi/oniguruma/
 http://www.freebsd.org/cgi/cvsweb.cgi/ports/devel/oniguruma/
 
 Oniguruma is a regular expressions library.
@@ -14,11 +13,12 @@ Supported character encodings:
 
   ASCII, UTF-8, UTF-16BE, UTF-16LE, UTF-32BE, UTF-32LE,
   EUC-JP, EUC-TW, EUC-KR, EUC-CN,
-  Shift_JIS, Big5, KOI8-R, KOI8 (*),
+  Shift_JIS, Big5, GB 18030, KOI8-R, KOI8,
   ISO-8859-1, ISO-8859-2, ISO-8859-3, ISO-8859-4, ISO-8859-5,
   ISO-8859-6, ISO-8859-7, ISO-8859-8, ISO-8859-9, ISO-8859-10,
   ISO-8859-11, ISO-8859-13, ISO-8859-14, ISO-8859-15, ISO-8859-16
 
+* GB 18030: contributed by KUBO Takehiro
 * KOI8 is not included in library archive by default setup.
   (need to edit Makefile if you want to use it.)
 ------------------------------------------------------------
@@ -31,15 +31,20 @@ Install
    2. make
    3. make install
 
-     library file: libonig.a
+   * uninstall
 
-   test (ASCII/EUC-JP)
+     make uninstall
 
-     make ctest
+   * test (ASCII/EUC-JP)
 
-   uninstall
+     make atest
 
-     make uninstall
+   * configuration check
+
+     onig-config --cflags
+     onig-config --libs
+     onig-config --prefix
+     onig-config --exec-prefix
 
 
 
@@ -73,8 +78,21 @@ Regular Expressions
 
 Usage
 
-  Include oniguruma.h in your program. (native API)
-  See doc/API for native API.
+  Include oniguruma.h in your program. (Oniguruma API)
+  See doc/API for Oniguruma API.
+
+  If you want to disable UChar type (== unsigned char) definition
+  in oniguruma.h, define ONIG_ESCAPE_UCHAR_COLLISION and then 
+  include oniguruma.h.
+
+  If you want to disable regex_t type definition in oniguruma.h,
+  define ONIG_ESCAPE_REGEX_T_COLLISION and then include oniguruma.h.
+
+  Example of the compiling/linking command line in Unix or Cygwin,
+  (prefix == /usr/local case)
+
+    cc sample.c -L/usr/local/lib -lonig
+
 
   If you want to use static link library(onig_s.lib) in Win32,
   add option -DONIG_EXTERN=extern to C compiler.
@@ -83,19 +101,20 @@ Usage
 
 Sample Programs
 
-  sample/simple.c    example of the minimum (native API)
+  sample/simple.c    example of the minimum (Oniguruma API)
   sample/names.c     example of the named group callback.
   sample/encode.c    example of some encodings.
   sample/listcap.c   example of the capture history.
   sample/posix.c     POSIX API sample.
   sample/sql.c       example of the variable meta characters.
                      (SQL-like pattern matching)
-  sample/syntax.c    Perl and Java syntax test.
+  sample/syntax.c    Perl, Java and ASIS syntax test.
 
 
 Source Files
 
   oniguruma.h        Oniguruma API header file. (public)
+  onig-config.in     configuration check program template.
 
   regenc.h           character encodings framework header file.
   regint.h           internal definitions
@@ -125,9 +144,10 @@ Source Files
   enc/euc_tw.c       EUC-TW encoding.
   enc/euc_kr.c       EUC-KR, EUC-CN encoding.
   enc/sjis.c         Shift_JIS encoding.
-  enc/big5.c         Big5   encoding.
-  enc/koi8.c         KOI8   encoding.
-  enc/koi8_r.c       KOI8-R encoding.
+  enc/big5.c         Big5      encoding.
+  enc/gb18030.c      GB 18030  encoding  (contributed by KUBO Takehiro)
+  enc/koi8.c         KOI8      encoding.
+  enc/koi8_r.c       KOI8-R    encoding.
   enc/iso8859_1.c    ISO-8859-1  encoding. (Latin-1)
   enc/iso8859_2.c    ISO-8859-2  encoding. (Latin-2)
   enc/iso8859_3.c    ISO-8859-3  encoding. (Latin-3)
@@ -159,23 +179,11 @@ Source Files
 API differences with Japanized GNU regex(version 0.12) of Ruby 1.8/1.6
 
    + re_compile_fastmap() is removed.
-   + re_recompile_pattern() is added.
    + re_alloc_pattern() is added.
 
 
-ToDo
-
-  ? ignore case in full code point range of Unicode.
-  ? Unicode Property.
-  ? ambig-flag Katakana <-> Hiragana.
-  ? add ONIG_OPTION_NOTBOS/NOTEOS. (\A, \z, \Z)
-  ? add ONIG_SYNTAX_ASIS.
- ?? \X (== \PM\pM*)
- ?? implement syntax behavior ONIG_SYN_CONTEXT_INDEP_ANCHORS.
- ?? variable line separator.
- ?? transmission stopper. (return ONIG_STOP from match_at())
 
-and I'm thankful to Akinori MUSHA.
+I'm thankful to Akinori MUSHA.
 
 
 Mail Address: K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
index 44553abfefd6ec39d9c80f408922b250921bd306..2dee793caed8f0ff05fb460f78b5f0123d65ba8e 100644 (file)
@@ -1,9 +1,8 @@
-README.ja  2005/02/04
+README.ja  2007/06/18
 
 µ´¼Ö  ----   (C) K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
 
 http://www.geocities.jp/kosako3/oniguruma/
-http://www.ruby-lang.org/cgi-bin/cvsweb.cgi/oniguruma/
 http://www.freebsd.org/cgi/cvsweb.cgi/ports/devel/oniguruma/
 
 µ´¼Ö¤ÏÀµµ¬É½¸½¥é¥¤¥Ö¥é¥ê¤Ç¤¢¤ë¡£
@@ -14,11 +13,12 @@ http://www.freebsd.org/cgi/cvsweb.cgi/ports/devel/oniguruma/
 
   ASCII, UTF-8, UTF-16BE, UTF-16LE, UTF-32BE, UTF-32LE,
   EUC-JP, EUC-TW, EUC-KR, EUC-CN,
-  Shift_JIS, Big5, KOI8-R, KOI8 (*),
+  Shift_JIS, Big5, GB 18030, KOI8-R, KOI8,
   ISO-8859-1, ISO-8859-2, ISO-8859-3, ISO-8859-4, ISO-8859-5,
   ISO-8859-6, ISO-8859-7, ISO-8859-8, ISO-8859-9, ISO-8859-10,
   ISO-8859-11, ISO-8859-13, ISO-8859-14, ISO-8859-15, ISO-8859-16
 
+* GB 18030: µ×ÊÝ·òÍλáÄó¶¡
 * KOI8¤Ï¥Ç¥Õ¥©¥ë¥È¤Î¥»¥Ã¥È¥¢¥Ã¥×¤Ç¤Ï¥é¥¤¥Ö¥é¥ê¤ÎÃæ¤Ë´Þ¤Þ¤ì¤Ê¤¤¡£
   (ɬÍפǤ¢¤ì¤ÐMakefile¤òÊÔ½¸¤¹¤ë¤³¤È)
 ------------------------------------------------------------
@@ -31,15 +31,21 @@ http://www.freebsd.org/cgi/cvsweb.cgi/ports/devel/oniguruma/
    2. make
    3. make install
 
-    ¥é¥¤¥Ö¥é¥ê¥Õ¥¡¥¤¥ë: libonig.a
+   ¥¢¥ó¥¤¥ó¥¹¥È¡¼¥ë
+
+     make uninstall
 
    Æ°ºî¥Æ¥¹¥È (ASCII/EUC-JP)
 
-     make ctest
+     make atest
 
-   ¥¢¥ó¥¤¥ó¥¹¥È¡¼¥ë
 
-     make uninstall
+   ¹½À®³Îǧ
+
+     onig-config --cflags
+     onig-config --libs
+     onig-config --prefix
+     onig-config --exec-prefix
 
 
 
@@ -71,8 +77,28 @@ http://www.freebsd.org/cgi/cvsweb.cgi/ports/devel/oniguruma/
 
 »ÈÍÑÊýË¡
 
-  »ÈÍѤ¹¤ë¥×¥í¥°¥é¥à¤Ç¡¢oniguruma.h¤ò¥¤¥ó¥¯¥ë¡¼¥É¤¹¤ë(Native API¤Î¾ì¹ç)¡£
-  Native API¤Ë¤Ä¤¤¤Æ¤Ï¡¢doc/API.ja¤ò»²¾È¡£
+  »ÈÍѤ¹¤ë¥×¥í¥°¥é¥à¤Ç¡¢oniguruma.h¤ò¥¤¥ó¥¯¥ë¡¼¥É¤¹¤ë(Oniguruma API¤Î¾ì¹ç)¡£
+  Oniguruma API¤Ë¤Ä¤¤¤Æ¤Ï¡¢doc/API.ja¤ò»²¾È¡£
+
+  oniguruma.h¤ÇÄêµÁ¤µ¤ì¤Æ¤¤¤ë·¿Ì¾UChar(== unsigned char)¤ò̵¸ú¤Ë¤·¤¿¤¤¾ì¹ç
+  ¤Ë¤Ï¡¢ONIG_ESCAPE_UCHAR_COLLISION¤òdefine¤·¤Æ¤«¤éoniguruma.h¤ò¥¤¥ó¥¯¥ë¡¼¥É
+  ¤¹¤ë¤³¤È¡£¤³¤Î¤È¤­¤Ë¤ÏUChar¤ÏÄêµÁ¤µ¤ì¤º¡¢OnigUChar¤È¤¤¤¦Ì¾Á°¤ÎÄêµÁ¤Î¤ß¤¬
+  Í­¸ú¤Ë¤Ê¤ë¡£
+
+  oniguruma.h¤ÇÄêµÁ¤µ¤ì¤Æ¤¤¤ë·¿Ì¾regex_t¤ò̵¸ú¤Ë¤·¤¿¤¤¾ì¹ç¤Ë¤Ï¡¢
+  ONIG_ESCAPE_REGEX_T_COLLISION¤òdefine¤·¤Æ¤«¤éoniguruma.h¤ò¥¤¥ó¥¯¥ë¡¼¥É
+  ¤¹¤ë¤³¤È¡£¤³¤Î¤È¤­¤Ë¤Ïregex_t¤ÏÄêµÁ¤µ¤ì¤º¡¢OnigRegexType, OnigRegex¤È¤¤¤¦
+  Ì¾Á°¤ÎÄêµÁ¤Î¤ß¤¬Í­¸ú¤Ë¤Ê¤ë¡£
+
+  Unix/Cygwin¾å¤Ç¥³¥ó¥Ñ¥¤¥ë¡¢¥ê¥ó¥¯¤¹¤ë¾ì¹ç¤ÎÎã¡§
+  (prefix¤¬/usr/local¤Î¤È¤­)
+  cc sample.c -L/usr/local/lib -lonig
+
+  GNU libtool¤ò»ÈÍѤ·¤Æ¤¤¤ë¤Î¤Ç¡¢¥×¥é¥Ã¥È¥Õ¥©¡¼¥à¤¬¶¦Í­¥é¥¤¥Ö¥é¥ê¤ò¥µ¥Ý¡¼¥È¤·¤Æ
+  ¤¤¤ì¤Ð¡¢»ÈÍѤǤ­¤ë¤è¤¦¤Ë¤Ê¤Ã¤Æ¤¤¤ë¡£
+  ÀÅۥ饤¥Ö¥é¥ê¤È¶¦Í­¥é¥¤¥Ö¥é¥ê¤Î¤É¤Á¤é¤ò»ÈÍѤ¹¤ë¤«¤ò»ØÄꤹ¤ëÊýË¡¡¢¼Â¹Ô»þÅÀ¤Ç¤Î
+  ´Ä¶­ÀßÄêÊýË¡¤Ë¤Ä¤Æ¤Ï¡¢¼«Ê¬¤ÇÄ´¤Ù¤Æ²¼¤µ¤¤¡£
+
 
   Win32¤Ç¥¹¥¿¥Æ¥£¥Ã¥¯¥ê¥ó¥¯¥é¥¤¥Ö¥é¥ê(onig_s.lib)¤ò¥ê¥ó¥¯¤¹¤ë¾ì¹ç¤Ë¤Ï¡¢
   ¥³¥ó¥Ñ¥¤¥ë¤¹¤ë¤È¤­¤Ë -DONIG_EXTERN=extern ¤ò¥³¥ó¥Ñ¥¤¥ë°ú¿ô¤ËÄɲ乤뤳¤È¡£
@@ -80,18 +106,19 @@ http://www.freebsd.org/cgi/cvsweb.cgi/ports/devel/oniguruma/
 
 »ÈÍÑÎã¥×¥í¥°¥é¥à
 
-  sample/simple.c    ºÇ¾®Îã (native API)
+  sample/simple.c    ºÇ¾®Îã (Oniguruma API)
   sample/names.c     Ì¾Á°ÉÕ¤­¥°¥ë¡¼¥×¥³¡¼¥ë¥Ð¥Ã¥¯»ÈÍÑÎã
   sample/encode.c    ´ö¤Ä¤«¤Îʸ»ú¥¨¥ó¥³¡¼¥Ç¥£¥ó¥°»ÈÍÑÎã
   sample/listcap.c   Êá³ÍÍúÎòµ¡Ç½¤Î»ÈÍÑÎã
   sample/posix.c     POSIX API»ÈÍÑÎã
   sample/sql.c       ²ÄÊѥ᥿ʸ»úµ¡Ç½»ÈÍÑÎã (SQL-like ¥Ñ¥¿¡¼¥ó)
-  sample/syntax.c    Perl¤ÈJavaʸˡ¤Î¥Æ¥¹¥È
+  sample/syntax.c    Perl¡¢Java¡¢ASISʸˡ¤Î¥Æ¥¹¥È
 
 
 ¥½¡¼¥¹¥Õ¥¡¥¤¥ë
 
   oniguruma.h        µ´¼ÖAPI¥Ø¥Ã¥À (¸ø³«)
+  onig-config.in     onig-config¥×¥í¥°¥é¥à ¥Æ¥ó¥×¥ì¡¼¥È
 
   regenc.h           Ê¸»ú¥¨¥ó¥³¡¼¥Ç¥£¥ó¥°ÏÈÁȤߥإåÀ
   regint.h           ÆâÉôÀë¸À
@@ -122,6 +149,7 @@ http://www.freebsd.org/cgi/cvsweb.cgi/ports/devel/oniguruma/
   enc/euc_kr.c       EUC-KR, EUC-CN ¥¨¥ó¥³¡¼¥Ç¥£¥ó¥°
   enc/sjis.c         Shift_JIS ¥¨¥ó¥³¡¼¥Ç¥£¥ó¥°
   enc/big5.c         Big5 ¥¨¥ó¥³¡¼¥Ç¥£¥ó¥°
+  enc/gb18030.c      GB 18030 ¥¨¥ó¥³¡¼¥Ç¥£¥ó¥° (µ×ÊÝ·òÍλá Äó¶¡)
   enc/koi8.c         KOI8 ¥¨¥ó¥³¡¼¥Ç¥£¥ó¥°
   enc/koi8_r.c       KOI8-R ¥¨¥ó¥³¡¼¥Ç¥£¥ó¥°
   enc/iso8859_1.c    ISO-8859-1  (Latin-1)
@@ -155,23 +183,10 @@ http://www.freebsd.org/cgi/cvsweb.cgi/ports/devel/oniguruma/
 Ruby 1.8/1.6¤ÎÆüËܸ첽GNU regex¤È¤ÎAPI¤Î°ã¤¤
 
    + re_compile_fastmap()   ¤Ïºï½ü¤µ¤ì¤¿¡£
-   + re_recompile_pattern() ¤¬Äɲ䵤줿¡£
    + re_alloc_pattern()     ¤¬Äɲ䵤줿¡£
 
 
-»Ä·ï
-
-  ? UnicodeÁ´¥³¡¼¥É¥Ý¥¤¥ó¥ÈÎΰè¤Ç¤ÎÂçʸ»ú¾®Ê¸»ú¾È¹ç
-  ? Unicode¥×¥í¥Ñ¥Æ¥£
-  ? ambig-flag Katakana <-> Hiragana
-  ? ONIG_OPTION_NOTBOS/NOTEOSÄɲà(\A, \z, \Z)
-  ? ONIG_SYNTAX_ASISÄɲÃ
- ?? \X (== \PM\pM*)
- ?? Ê¸Ë¡Í×ÁÇ ONIG_SYN_CONTEXT_INDEP_ANCHORS¤Î¼ÂÁõ
- ?? ²þ¹Ôʸ»ú(ʸ»úÎó)¤òÊѹ¹¤Ç¤­¤ë
- ?? ¸¡º÷°ÌÃÖ°ÜÆ°Ää»ß±é»»»Ò (match_at()¤«¤éONIG_STOP¤òÊÖ¤¹)
-
-and I'm thankful to Akinori MUSHA.
+I'm thankful to Akinori MUSHA.
 
 
-Mail Address: K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
+¥¢¥É¥ì¥¹: K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
index 5ca2056fb396af7334f4a3ed8b17305b99affca5..4a2fc28d8234dd08b284d6e58d3ca43f1112b7ca 100644 (file)
-/* config.h.in.  Generated automatically from configure.in by autoheader.  */
+/* config.h.in.  Generated from configure.in by autoheader.  */
 
-/* Define if using alloca.c.  */
-#undef C_ALLOCA
-
-/* Define to empty if the keyword does not work.  */
-#undef const
-
-/* Define to one of _getb67, GETB67, getb67 for Cray-2 and Cray-YMP systems.
-   This function is required for alloca.c support on those systems.  */
+/* Define to one of `_getb67', `GETB67', `getb67' for Cray-2 and Cray-YMP
+   systems. This function is required for `alloca.c' support on those systems.
+   */
 #undef CRAY_STACKSEG_END
 
-/* Define if you have alloca, as a function or macro.  */
+/* Define to 1 if using `alloca.c'. */
+#undef C_ALLOCA
+
+/* Define to 1 if you have `alloca', as a function or macro. */
 #undef HAVE_ALLOCA
 
-/* Define if you have <alloca.h> and it should be used (not on Ultrix).  */
+/* Define to 1 if you have <alloca.h> and it should be used (not on Ultrix).
+   */
 #undef HAVE_ALLOCA_H
 
-/* If using the C implementation of alloca, define if you know the
-   direction of stack growth for your system; otherwise it will be
-   automatically deduced at run-time.
- STACK_DIRECTION > 0 => grows toward higher addresses
- STACK_DIRECTION < 0 => grows toward lower addresses
- STACK_DIRECTION = 0 => direction of growth unknown
- */
-#undef STACK_DIRECTION
+/* Define to 1 if you have the <dlfcn.h> header file. */
+#undef HAVE_DLFCN_H
 
-/* Define if you have the ANSI C header files.  */
-#undef STDC_HEADERS
+/* Define to 1 if you have the <inttypes.h> header file. */
+#undef HAVE_INTTYPES_H
 
-/* Define if you can safely include both <sys/time.h> and <time.h>.  */
-#undef TIME_WITH_SYS_TIME
+/* Define to 1 if you have the <memory.h> header file. */
+#undef HAVE_MEMORY_H
 
-/* The number of bytes in a int.  */
-#undef SIZEOF_INT
+/* Define if compilerr supports prototypes */
+#undef HAVE_PROTOTYPES
 
-/* The number of bytes in a long.  */
-#undef SIZEOF_LONG
+/* Define if compiler supports stdarg prototypes */
+#undef HAVE_STDARG_PROTOTYPES
 
-/* The number of bytes in a short.  */
-#undef SIZEOF_SHORT
+/* Define to 1 if you have the <stdint.h> header file. */
+#undef HAVE_STDINT_H
 
-/* Define if you have the <stdlib.h> header file.  */
+/* Define to 1 if you have the <stdlib.h> header file. */
 #undef HAVE_STDLIB_H
 
-/* Define if you have the <string.h> header file.  */
+/* Define to 1 if you have the <strings.h> header file. */
+#undef HAVE_STRINGS_H
+
+/* Define to 1 if you have the <string.h> header file. */
 #undef HAVE_STRING_H
 
-/* Define if you have the <strings.h> header file.  */
-#undef HAVE_STRINGS_H
+/* Define to 1 if you have the <sys/stat.h> header file. */
+#undef HAVE_SYS_STAT_H
 
-/* Define if you have the <sys/types.h> header file.  */
-#undef HAVE_SYS_TYPES_H
+/* Define to 1 if you have the <sys/times.h> header file. */
+#undef HAVE_SYS_TIMES_H
 
-/* Define if you have the <sys/time.h> header file.  */
+/* Define to 1 if you have the <sys/time.h> header file. */
 #undef HAVE_SYS_TIME_H
 
-/* Define if you have the <sys/times.h> header file.  */
-#undef HAVE_SYS_TIMES_H
+/* Define to 1 if you have the <sys/types.h> header file. */
+#undef HAVE_SYS_TYPES_H
 
-/* Define if you have the <unistd.h> header file.  */
+/* Define to 1 if you have the <unistd.h> header file. */
 #undef HAVE_UNISTD_H
 
-/* Define if you have the function argument prototype */
-#undef HAVE_PROTOTYPES
+/* Name of package */
+#undef PACKAGE
 
-/* Define if you have the variable length prototypes and stdarg.h */
-#undef HAVE_STDARG_PROTOTYPES
+/* Define to the address where bug reports for this package should be sent. */
+#undef PACKAGE_BUGREPORT
+
+/* Define to the full name of this package. */
+#undef PACKAGE_NAME
+
+/* Define to the full name and version of this package. */
+#undef PACKAGE_STRING
+
+/* Define to the one symbol short name of this package. */
+#undef PACKAGE_TARNAME
+
+/* Define to the version of this package. */
+#undef PACKAGE_VERSION
+
+/* The size of a `int', as computed by sizeof. */
+#undef SIZEOF_INT
+
+/* The size of a `long', as computed by sizeof. */
+#undef SIZEOF_LONG
+
+/* The size of a `short', as computed by sizeof. */
+#undef SIZEOF_SHORT
 
+/* If using the C implementation of alloca, define if you know the
+   direction of stack growth for your system; otherwise it will be
+   automatically deduced at run-time.
+       STACK_DIRECTION > 0 => grows toward higher addresses
+       STACK_DIRECTION < 0 => grows toward lower addresses
+       STACK_DIRECTION = 0 => direction of growth unknown */
+#undef STACK_DIRECTION
+
+/* Define to 1 if you have the ANSI C header files. */
+#undef STDC_HEADERS
+
+/* Define to 1 if you can safely include both <sys/time.h> and <time.h>. */
+#undef TIME_WITH_SYS_TIME
+
+/* Define if combination explosion check */
+#undef USE_COMBINATION_EXPLOSION_CHECK
+
+/* Version number of package */
+#undef VERSION
+
+/* Define to empty if `const' does not conform to ANSI C. */
+#undef const
index 763872e963a004f8d47de8b299fdafd0c7cde3c2..86792666a4b0776c973f663941f23d6a32c93240 100644 (file)
@@ -29,7 +29,7 @@
 
 #include "regenc.h"
 
-static int EncLen_BIG5[] = {
+static const int EncLen_BIG5[] = {
   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
index 5f13e33eb4c0c663d2710b824b660a15fa885c9f..71c81ee9fe833e241be47d01e8d5e749d742c05e 100644 (file)
@@ -31,7 +31,7 @@
 
 #define eucjp_islead(c)    ((UChar )((c) - 0xa1) > 0xfe - 0xa1)
 
-static int EncLen_EUCJP[] = {
+static const int EncLen_EUCJP[] = {
   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
@@ -158,20 +158,16 @@ eucjp_is_mbc_ambiguous(OnigAmbigType flag, const UChar** pp, const UChar* end)
 static int
 eucjp_is_code_ctype(OnigCodePoint code, unsigned int ctype)
 {
-  if ((ctype & ONIGENC_CTYPE_WORD) != 0) {
-    if (code < 128)
-      return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype);
-    else
+  if (code < 128)
+    return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype);
+  else {
+    if ((ctype & (ONIGENC_CTYPE_WORD |
+                  ONIGENC_CTYPE_GRAPH | ONIGENC_CTYPE_PRINT)) != 0) {
       return (eucjp_code_to_mbclen(code) > 1 ? TRUE : FALSE);
-
-    ctype &= ~ONIGENC_CTYPE_WORD;
-    if (ctype == 0) return FALSE;
+    }
   }
 
-  if (code < 128)
-    return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype);
-  else
-    return FALSE;
+  return FALSE;
 }
 
 static UChar*
index c1e83b7e66024753d9e7cb881681fd0efd7a6dba..57bf801536cb1e8b814c7463954ed630a092d793 100644 (file)
@@ -29,7 +29,7 @@
 
 #include "regenc.h"
 
-static int EncLen_EUCKR[] = {
+static const int EncLen_EUCKR[] = {
   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
index 4e5851a45158dc93241b9e5f1b8528ff299597ac..6f396e75e6a588676d5e7f70d126ce1c43242cc9 100644 (file)
@@ -29,7 +29,7 @@
 
 #include "regenc.h"
 
-static int EncLen_EUCTW[] = {
+static const int EncLen_EUCTW[] = {
   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
index 53ad52ee13d6b35af6bd9980419e6e3eb0e35fd4..5646f26c102637e01bb3b7aae2e8e81d05d5778a 100644 (file)
@@ -2,7 +2,7 @@
   iso8859_1.c -  Oniguruma (regular expression library)
 **********************************************************************/
 /*-
- * Copyright (c) 2002-2005  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp>
+ * Copyright (c) 2002-2006  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
 #define ENC_IS_ISO_8859_1_CTYPE(code,ctype) \
   ((EncISO_8859_1_CtypeTable[code] & ctype) != 0)
 
-static unsigned short EncISO_8859_1_CtypeTable[256] = {
+static const unsigned short EncISO_8859_1_CtypeTable[256] = {
   0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
   0x2008, 0x220c, 0x2209, 0x2208, 0x2208, 0x2208, 0x2008, 0x2008,
   0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
   0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
-  0x2284, 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x21a0, 0x21a0, 0x21a0,
-  0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
+  0x2284, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
+  0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
   0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0,
-  0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x20a0, 0x20a0, 0x20a0, 0x21a0,
+  0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
   0x21a0, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x34a2,
   0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
   0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
-  0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x31a0,
-  0x20a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2,
+  0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x31a0,
+  0x21a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2,
   0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
   0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
-  0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x20a0, 0x21a0, 0x20a0, 0x2008,
+  0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x2008,
   0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
   0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
   0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
@@ -72,16 +72,6 @@ iso_8859_1_mbc_to_normalize(OnigAmbigType flag, const UChar** pp, const UChar* e
 {
   const UChar* p = *pp;
 
-  if (end > p + 1 && (flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0) {
-    if ((*p == 's' && *(p+1) == 's') ||
-       ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
-        (*p == 'S' && *(p+1) == 'S'))) {
-      *lower = 0xdf;
-      (*pp) += 2;
-      return 1;
-    }
-  }
-
   if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
        ONIGENC_IS_MBC_ASCII(p)) ||
       ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 &&
@@ -101,22 +91,6 @@ iso_8859_1_is_mbc_ambiguous(OnigAmbigType flag,
 {
   const UChar* p = *pp;
 
-  if ((flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0) {
-    if (end > p + 1) {
-      if ((*p == 's' && *(p+1) == 's') ||
-         ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
-          (*p == 'S' && *(p+1) == 'S'))) {
-       (*pp) += 2;
-       return TRUE;
-      }
-    }
-
-    if (*p == 0xdf) {
-      (*pp)++;
-      return TRUE;
-    }
-  }
-
   (*pp)++;
   if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
        ONIGENC_IS_MBC_ASCII(p)) ||
@@ -153,8 +127,7 @@ OnigEncodingType OnigEncodingISO_8859_1 = {
   1,             /* max enc length */
   1,             /* min enc length */
   (ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE |
-   ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE |
-   ONIGENC_AMBIGUOUS_MATCH_COMPOUND),
+   ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE ),
   {
       (OnigCodePoint )'\\'                       /* esc */
     , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.'  */
index a9331cebf35eb729c9aa316f9b9e8e348e4ffef4..8081ef8010f98c72f1954038c4aecc5f82272221 100644 (file)
@@ -2,7 +2,7 @@
   iso8859_10.c -  Oniguruma (regular expression library)
 **********************************************************************/
 /*-
- * Copyright (c) 2002-2005  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp>
+ * Copyright (c) 2002-2006  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -33,7 +33,7 @@
 #define ENC_IS_ISO_8859_10_CTYPE(code,ctype) \
   ((EncISO_8859_10_CtypeTable[code] & ctype) != 0)
 
-static UChar EncISO_8859_10_ToLowerCaseTable[256] = {
+static const UChar EncISO_8859_10_ToLowerCaseTable[256] = {
   '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
   '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017',
   '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027',
@@ -68,23 +68,23 @@ static UChar EncISO_8859_10_ToLowerCaseTable[256] = {
   '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377'
 };
 
-static unsigned short EncISO_8859_10_CtypeTable[256] = {
+static const unsigned short EncISO_8859_10_CtypeTable[256] = {
   0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
   0x2008, 0x220c, 0x2209, 0x2208, 0x2208, 0x2208, 0x2008, 0x2008,
   0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
   0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
-  0x2284, 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x21a0, 0x21a0, 0x21a0,
-  0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
+  0x2284, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
+  0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
   0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0,
-  0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x20a0, 0x20a0, 0x20a0, 0x21a0,
+  0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
   0x21a0, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x34a2,
   0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
   0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
-  0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x31a0,
-  0x20a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2,
+  0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x31a0,
+  0x21a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2,
   0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
   0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
-  0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x20a0, 0x21a0, 0x20a0, 0x2008,
+  0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x2008,
   0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
   0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
   0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
@@ -109,16 +109,6 @@ iso_8859_10_mbc_to_normalize(OnigAmbigType flag,
 {
   const UChar* p = *pp;
 
-  if (end > p + 1 && (flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0) {
-    if ((*p == 's' && *(p+1) == 's') ||
-       ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
-        (*p == 'S' && *(p+1) == 'S'))) {
-      *lower = 0xdf;
-      (*pp) += 2;
-      return 1;
-    }
-  }
-
   if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
        ONIGENC_IS_MBC_ASCII(p)) ||
       ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 &&
@@ -138,22 +128,6 @@ iso_8859_10_is_mbc_ambiguous(OnigAmbigType flag,
 {
   const UChar* p = *pp;
 
-  if ((flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0) {
-    if (end > p + 1) {
-      if ((*p == 's' && *(p+1) == 's') ||
-         ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
-          (*p == 'S' && *(p+1) == 'S'))) {
-       (*pp) += 2;
-       return TRUE;
-      }
-    }
-
-    if (*p == 0xdf) {
-      (*pp)++;
-      return TRUE;
-    }
-  }
-
   (*pp)++;
   if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
        ONIGENC_IS_MBC_ASCII(p)) ||
@@ -186,9 +160,9 @@ iso_8859_10_is_code_ctype(OnigCodePoint code, unsigned int ctype)
 
 static int
 iso_8859_10_get_all_pair_ambig_codes(OnigAmbigType flag,
-                                    OnigPairAmbigCodes** ccs)
+                                    const OnigPairAmbigCodes** ccs)
 {
-  static OnigPairAmbigCodes cc[] = {
+  static const OnigPairAmbigCodes cc[] = {
     { 0xa1, 0xb1 },
     { 0xa2, 0xb2 },
     { 0xa3, 0xb3 },
@@ -302,8 +276,7 @@ OnigEncodingType OnigEncodingISO_8859_10 = {
   1,             /* max enc length */
   1,             /* min enc length */
   (ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE |
-   ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE |
-   ONIGENC_AMBIGUOUS_MATCH_COMPOUND),
+   ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE ),
   {
       (OnigCodePoint )'\\'                       /* esc */
     , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.'  */
index bb1098807ac2d67ce7b82f43886c033649e41cc6..de9bb3b8257168595f2d9cb193ef3ce0e60a9a40 100644 (file)
 #define ENC_IS_ISO_8859_11_CTYPE(code,ctype) \
   ((EncISO_8859_11_CtypeTable[code] & ctype) != 0)
 
-static unsigned short EncISO_8859_11_CtypeTable[256] = {
+static const unsigned short EncISO_8859_11_CtypeTable[256] = {
   0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
   0x2008, 0x220c, 0x2209, 0x2208, 0x2208, 0x2208, 0x2008, 0x2008,
   0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
   0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
-  0x2284, 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x21a0, 0x21a0, 0x21a0,
-  0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
+  0x2284, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
+  0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
   0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0,
-  0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x20a0, 0x20a0, 0x20a0, 0x21a0,
+  0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
   0x21a0, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x34a2,
   0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
   0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
-  0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x31a0,
-  0x20a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2,
+  0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x31a0,
+  0x21a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2,
   0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
   0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
-  0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x20a0, 0x21a0, 0x20a0, 0x2008,
+  0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x2008,
   0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
   0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
   0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
index 827ca508e8b92f3f4090448b0de8cde8bab07bdd..69316edfc351b45b1684e318e99a10fd6590b863 100644 (file)
@@ -2,7 +2,7 @@
   iso8859_13.c -  Oniguruma (regular expression library)
 **********************************************************************/
 /*-
- * Copyright (c) 2002-2005  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp>
+ * Copyright (c) 2002-2007  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -33,7 +33,7 @@
 #define ENC_IS_ISO_8859_13_CTYPE(code,ctype) \
   ((EncISO_8859_13_CtypeTable[code] & ctype) != 0)
 
-static UChar EncISO_8859_13_ToLowerCaseTable[256] = {
+static const UChar EncISO_8859_13_ToLowerCaseTable[256] = {
   '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
   '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017',
   '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027',
@@ -68,23 +68,23 @@ static UChar EncISO_8859_13_ToLowerCaseTable[256] = {
   '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377'
 };
 
-static unsigned short EncISO_8859_13_CtypeTable[256] = {
+static const unsigned short EncISO_8859_13_CtypeTable[256] = {
   0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
   0x2008, 0x220c, 0x2209, 0x2208, 0x2208, 0x2208, 0x2008, 0x2008,
   0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
   0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
-  0x2284, 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x21a0, 0x21a0, 0x21a0,
-  0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
+  0x2284, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
+  0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
   0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0,
-  0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x20a0, 0x20a0, 0x20a0, 0x21a0,
+  0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
   0x21a0, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x34a2,
   0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
   0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
-  0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x31a0,
-  0x20a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2,
+  0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x31a0,
+  0x21a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2,
   0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
   0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
-  0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x20a0, 0x21a0, 0x20a0, 0x2008,
+  0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x2008,
   0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
   0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
   0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
@@ -104,21 +104,11 @@ static unsigned short EncISO_8859_13_CtypeTable[256] = {
 };
 
 static int
-iso_8859_13_mbc_to_normalize(OnigAmbigType flag,
-                            const UChar** pp, const UChar* end, UChar* lower)
+mbc_to_normalize(OnigAmbigType flag,
+                const UChar** pp, const UChar* end, UChar* lower)
 {
   const UChar* p = *pp;
 
-  if (end > p + 1 && (flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0) {
-    if ((*p == 's' && *(p+1) == 's') ||
-       ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
-        (*p == 'S' && *(p+1) == 'S'))) {
-      *lower = 0xdf;
-      (*pp) += 2;
-      return 1;
-    }
-  }
-
   if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
        ONIGENC_IS_MBC_ASCII(p)) ||
       ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 &&
@@ -133,27 +123,10 @@ iso_8859_13_mbc_to_normalize(OnigAmbigType flag,
 }
 
 static int
-iso_8859_13_is_mbc_ambiguous(OnigAmbigType flag,
-                            const UChar** pp, const UChar* end)
+is_mbc_ambiguous(OnigAmbigType flag, const UChar** pp, const UChar* end)
 {
   const UChar* p = *pp;
 
-  if ((flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0) {
-    if (end > p + 1) {
-      if ((*p == 's' && *(p+1) == 's') ||
-         ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
-          (*p == 'S' && *(p+1) == 'S'))) {
-       (*pp) += 2;
-       return TRUE;
-      }
-    }
-
-    if (*p == 0xdf) {
-      (*pp)++;
-      return TRUE;
-    }
-  }
-
   (*pp)++;
   if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
        ONIGENC_IS_MBC_ASCII(p)) ||
@@ -176,7 +149,7 @@ iso_8859_13_is_mbc_ambiguous(OnigAmbigType flag,
 }
 
 static int
-iso_8859_13_is_code_ctype(OnigCodePoint code, unsigned int ctype)
+is_code_ctype(OnigCodePoint code, unsigned int ctype)
 {
   if (code < 256)
     return ENC_IS_ISO_8859_13_CTYPE(code, ctype);
@@ -185,74 +158,73 @@ iso_8859_13_is_code_ctype(OnigCodePoint code, unsigned int ctype)
 }
 
 static int
-iso_8859_13_get_all_pair_ambig_codes(OnigAmbigType flag,
-                                     OnigPairAmbigCodes** ccs)
+get_all_pair_ambig_codes(OnigAmbigType flag, const OnigPairAmbigCodes** ccs)
 {
 static OnigPairAmbigCodes cc[] = {
-    { 0xc0, 0xe0 },
-    { 0xc1, 0xe1 },
-    { 0xc2, 0xe2 },
-    { 0xc3, 0xe3 },
-    { 0xc4, 0xe4 },
-    { 0xc5, 0xe5 },
-    { 0xc6, 0xe6 },
-    { 0xc7, 0xe7 },
-    { 0xc8, 0xe8 },
-    { 0xc9, 0xe9 },
-    { 0xca, 0xea },
-    { 0xcb, 0xeb },
-    { 0xcc, 0xec },
-    { 0xcd, 0xed },
-    { 0xce, 0xee },
-    { 0xcf, 0xef },
static const OnigPairAmbigCodes cc[] = {
+  { 0xc0, 0xe0 },
+  { 0xc1, 0xe1 },
+  { 0xc2, 0xe2 },
+  { 0xc3, 0xe3 },
+  { 0xc4, 0xe4 },
+  { 0xc5, 0xe5 },
+  { 0xc6, 0xe6 },
+  { 0xc7, 0xe7 },
+  { 0xc8, 0xe8 },
+  { 0xc9, 0xe9 },
+  { 0xca, 0xea },
+  { 0xcb, 0xeb },
+  { 0xcc, 0xec },
+  { 0xcd, 0xed },
+  { 0xce, 0xee },
+  { 0xcf, 0xef },
 
-    { 0xd0, 0xf0 },
-    { 0xd1, 0xf1 },
-    { 0xd2, 0xf2 },
-    { 0xd3, 0xf3 },
-    { 0xd4, 0xf4 },
-    { 0xd5, 0xf5 },
-    { 0xd6, 0xf6 },
-    { 0xd8, 0xf8 },
-    { 0xd9, 0xf9 },
-    { 0xda, 0xfa },
-    { 0xdb, 0xfb },
-    { 0xdc, 0xfc },
-    { 0xdd, 0xfd },
-    { 0xde, 0xfe },
+  { 0xd0, 0xf0 },
+  { 0xd1, 0xf1 },
+  { 0xd2, 0xf2 },
+  { 0xd3, 0xf3 },
+  { 0xd4, 0xf4 },
+  { 0xd5, 0xf5 },
+  { 0xd6, 0xf6 },
+  { 0xd8, 0xf8 },
+  { 0xd9, 0xf9 },
+  { 0xda, 0xfa },
+  { 0xdb, 0xfb },
+  { 0xdc, 0xfc },
+  { 0xdd, 0xfd },
+  { 0xde, 0xfe },
 
-    { 0xe0, 0xc0 },
-    { 0xe1, 0xc1 },
-    { 0xe2, 0xc2 },
-    { 0xe3, 0xc3 },
-    { 0xe4, 0xc4 },
-    { 0xe5, 0xc5 },
-    { 0xe6, 0xc6 },
-    { 0xe7, 0xc7 },
-    { 0xe8, 0xc8 },
-    { 0xe9, 0xc9 },
-    { 0xea, 0xca },
-    { 0xeb, 0xcb },
-    { 0xec, 0xcc },
-    { 0xed, 0xcd },
-    { 0xee, 0xce },
-    { 0xef, 0xcf },
+  { 0xe0, 0xc0 },
+  { 0xe1, 0xc1 },
+  { 0xe2, 0xc2 },
+  { 0xe3, 0xc3 },
+  { 0xe4, 0xc4 },
+  { 0xe5, 0xc5 },
+  { 0xe6, 0xc6 },
+  { 0xe7, 0xc7 },
+  { 0xe8, 0xc8 },
+  { 0xe9, 0xc9 },
+  { 0xea, 0xca },
+  { 0xeb, 0xcb },
+  { 0xec, 0xcc },
+  { 0xed, 0xcd },
+  { 0xee, 0xce },
+  { 0xef, 0xcf },
 
-    { 0xf0, 0xd0 },
-    { 0xf1, 0xd1 },
-    { 0xf2, 0xd2 },
-    { 0xf3, 0xd3 },
-    { 0xf4, 0xd4 },
-    { 0xf5, 0xd5 },
-    { 0xf6, 0xd6 },
-    { 0xf8, 0xd8 },
-    { 0xf9, 0xd9 },
-    { 0xfa, 0xda },
-    { 0xfb, 0xdb },
-    { 0xfc, 0xdc },
-    { 0xfd, 0xdd },
-    { 0xfe, 0xde }
 };
+  { 0xf0, 0xd0 },
+  { 0xf1, 0xd1 },
+  { 0xf2, 0xd2 },
+  { 0xf3, 0xd3 },
+  { 0xf4, 0xd4 },
+  { 0xf5, 0xd5 },
+  { 0xf6, 0xd6 },
+  { 0xf8, 0xd8 },
+  { 0xf9, 0xd9 },
+  { 0xfa, 0xda },
+  { 0xfb, 0xdb },
+  { 0xfc, 0xdc },
+  { 0xfd, 0xdd },
+  { 0xfe, 0xde }
+ };
 
   if (flag == ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) {
     *ccs = OnigAsciiPairAmbigCodes;
@@ -272,8 +244,7 @@ OnigEncodingType OnigEncodingISO_8859_13 = {
   1,             /* max enc length */
   1,             /* min enc length */
   (ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE |
-   ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE |
-   ONIGENC_AMBIGUOUS_MATCH_COMPOUND),
+   ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE ),
   {
       (OnigCodePoint )'\\'                       /* esc */
     , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.'  */
@@ -286,11 +257,11 @@ OnigEncodingType OnigEncodingISO_8859_13 = {
   onigenc_single_byte_mbc_to_code,
   onigenc_single_byte_code_to_mbclen,
   onigenc_single_byte_code_to_mbc,
-  iso_8859_13_mbc_to_normalize,
-  iso_8859_13_is_mbc_ambiguous,
-  iso_8859_13_get_all_pair_ambig_codes,
+  mbc_to_normalize,
+  is_mbc_ambiguous,
+  get_all_pair_ambig_codes,
   onigenc_ess_tsett_get_all_comp_ambig_codes,
-  iso_8859_13_is_code_ctype,
+  is_code_ctype,
   onigenc_not_support_get_ctype_code_range,
   onigenc_single_byte_left_adjust_char_head,
   onigenc_always_true_is_allowed_reverse_match
index 4fe5ab29d1ef039fdef9783ed897a7e9d071c4d2..44638cf13a50f5e8711e5786ccf13e14b739aad5 100644 (file)
@@ -2,7 +2,7 @@
   iso8859_14.c -  Oniguruma (regular expression library)
 **********************************************************************/
 /*-
- * Copyright (c) 2002-2005  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp>
+ * Copyright (c) 2002-2007  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -33,7 +33,7 @@
 #define ENC_IS_ISO_8859_14_CTYPE(code,ctype) \
   ((EncISO_8859_14_CtypeTable[code] & ctype) != 0)
 
-static UChar EncISO_8859_14_ToLowerCaseTable[256] = {
+static const UChar EncISO_8859_14_ToLowerCaseTable[256] = {
   '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
   '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017',
   '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027',
@@ -68,23 +68,23 @@ static UChar EncISO_8859_14_ToLowerCaseTable[256] = {
   '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377'
 };
 
-static unsigned short EncISO_8859_14_CtypeTable[256] = {
+static const unsigned short EncISO_8859_14_CtypeTable[256] = {
   0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
   0x2008, 0x220c, 0x2209, 0x2208, 0x2208, 0x2208, 0x2008, 0x2008,
   0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
   0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
-  0x2284, 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x21a0, 0x21a0, 0x21a0,
-  0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
+  0x2284, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
+  0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
   0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0,
-  0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x20a0, 0x20a0, 0x20a0, 0x21a0,
+  0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
   0x21a0, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x34a2,
   0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
   0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
-  0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x31a0,
-  0x20a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2,
+  0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x31a0,
+  0x21a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2,
   0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
   0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
-  0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x20a0, 0x21a0, 0x20a0, 0x2008,
+  0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x2008,
   0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
   0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
   0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
@@ -104,21 +104,11 @@ static unsigned short EncISO_8859_14_CtypeTable[256] = {
 };
 
 static int
-iso_8859_14_mbc_to_normalize(OnigAmbigType flag,
-                             const UChar** pp, const UChar* end, UChar* lower)
+mbc_to_normalize(OnigAmbigType flag,
+                const UChar** pp, const UChar* end, UChar* lower)
 {
   const UChar* p = *pp;
 
-  if (end > p + 1 && (flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0) {
-    if ((*p == 's' && *(p+1) == 's') ||
-       ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
-        (*p == 'S' && *(p+1) == 'S'))) {
-      *lower = 0xdf;
-      (*pp) += 2;
-      return 1;
-    }
-  }
-
   if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
        ONIGENC_IS_MBC_ASCII(p)) ||
       ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 &&
@@ -133,27 +123,10 @@ iso_8859_14_mbc_to_normalize(OnigAmbigType flag,
 }
 
 static int
-iso_8859_14_is_mbc_ambiguous(OnigAmbigType flag,
-                            const UChar** pp, const UChar* end)
+is_mbc_ambiguous(OnigAmbigType flag, const UChar** pp, const UChar* end)
 {
   const UChar* p = *pp;
 
-  if ((flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0) {
-    if (end > p + 1) {
-      if ((*p == 's' && *(p+1) == 's') ||
-         ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
-          (*p == 'S' && *(p+1) == 'S'))) {
-       (*pp) += 2;
-       return TRUE;
-      }
-    }
-
-    if (*p == 0xdf) {
-      (*pp)++;
-      return TRUE;
-    }
-  }
-
   (*pp)++;
   if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
        ONIGENC_IS_MBC_ASCII(p)) ||
@@ -176,7 +149,7 @@ iso_8859_14_is_mbc_ambiguous(OnigAmbigType flag,
 }
 
 static int
-iso_8859_14_is_code_ctype(OnigCodePoint code, unsigned int ctype)
+is_code_ctype(OnigCodePoint code, unsigned int ctype)
 {
   if (code < 256)
     return ENC_IS_ISO_8859_14_CTYPE(code, ctype);
@@ -185,103 +158,102 @@ iso_8859_14_is_code_ctype(OnigCodePoint code, unsigned int ctype)
 }
 
 static int
-iso_8859_14_get_all_pair_ambig_codes(OnigAmbigType flag,
-                                     OnigPairAmbigCodes** ccs)
+get_all_pair_ambig_codes(OnigAmbigType flag, const OnigPairAmbigCodes** ccs)
 {
-  static OnigPairAmbigCodes cc[] = {
-    { 0xa1, 0xa2 },
-    { 0xa2, 0xa1 },
-    { 0xa4, 0xa5 },
-    { 0xa5, 0xa4 },
-    { 0xa6, 0xab },
-    { 0xa8, 0xb8 },
-    { 0xaa, 0xba },
-    { 0xab, 0xa6 },
-    { 0xac, 0xbc },
-    { 0xaf, 0xff },
+  static const OnigPairAmbigCodes cc[] = {
+   { 0xa1, 0xa2 },
+   { 0xa2, 0xa1 },
+   { 0xa4, 0xa5 },
+   { 0xa5, 0xa4 },
+   { 0xa6, 0xab },
+   { 0xa8, 0xb8 },
+   { 0xaa, 0xba },
+   { 0xab, 0xa6 },
+   { 0xac, 0xbc },
+   { 0xaf, 0xff },
 
-    { 0xb0, 0xb1 },
-    { 0xb1, 0xb0 },
-    { 0xb2, 0xb3 },
-    { 0xb3, 0xb2 },
-    { 0xb4, 0xb5 },
-    { 0xb5, 0xb4 },
-    { 0xb7, 0xb9 },
-    { 0xb8, 0xa8 },
-    { 0xb9, 0xb7 },
-    { 0xba, 0xaa },
-    { 0xbb, 0xbf },
-    { 0xbc, 0xac },
-    { 0xbd, 0xbe },
-    { 0xbe, 0xbd },
-    { 0xbf, 0xbb },
+   { 0xb0, 0xb1 },
+   { 0xb1, 0xb0 },
+   { 0xb2, 0xb3 },
+   { 0xb3, 0xb2 },
+   { 0xb4, 0xb5 },
+   { 0xb5, 0xb4 },
+   { 0xb7, 0xb9 },
+   { 0xb8, 0xa8 },
+   { 0xb9, 0xb7 },
+   { 0xba, 0xaa },
+   { 0xbb, 0xbf },
+   { 0xbc, 0xac },
+   { 0xbd, 0xbe },
+   { 0xbe, 0xbd },
+   { 0xbf, 0xbb },
 
-    { 0xc0, 0xe0 },
-    { 0xc1, 0xe1 },
-    { 0xc2, 0xe2 },
-    { 0xc3, 0xe3 },
-    { 0xc4, 0xe4 },
-    { 0xc5, 0xe5 },
-    { 0xc6, 0xe6 },
-    { 0xc7, 0xe7 },
-    { 0xc8, 0xe8 },
-    { 0xc9, 0xe9 },
-    { 0xca, 0xea },
-    { 0xcb, 0xeb },
-    { 0xcc, 0xec },
-    { 0xcd, 0xed },
-    { 0xce, 0xee },
-    { 0xcf, 0xef },
+   { 0xc0, 0xe0 },
+   { 0xc1, 0xe1 },
+   { 0xc2, 0xe2 },
+   { 0xc3, 0xe3 },
+   { 0xc4, 0xe4 },
+   { 0xc5, 0xe5 },
+   { 0xc6, 0xe6 },
+   { 0xc7, 0xe7 },
+   { 0xc8, 0xe8 },
+   { 0xc9, 0xe9 },
+   { 0xca, 0xea },
+   { 0xcb, 0xeb },
+   { 0xcc, 0xec },
+   { 0xcd, 0xed },
+   { 0xce, 0xee },
+   { 0xcf, 0xef },
 
-    { 0xd0, 0xf0 },
-    { 0xd1, 0xf1 },
-    { 0xd2, 0xf2 },
-    { 0xd3, 0xf3 },
-    { 0xd4, 0xf4 },
-    { 0xd5, 0xf5 },
-    { 0xd6, 0xf6 },
-    { 0xd7, 0xf7 },
-    { 0xd8, 0xf8 },
-    { 0xd9, 0xf9 },
-    { 0xda, 0xfa },
-    { 0xdb, 0xfb },
-    { 0xdc, 0xfc },
-    { 0xdd, 0xfd },
-    { 0xde, 0xfe },
+   { 0xd0, 0xf0 },
+   { 0xd1, 0xf1 },
+   { 0xd2, 0xf2 },
+   { 0xd3, 0xf3 },
+   { 0xd4, 0xf4 },
+   { 0xd5, 0xf5 },
+   { 0xd6, 0xf6 },
+   { 0xd7, 0xf7 },
+   { 0xd8, 0xf8 },
+   { 0xd9, 0xf9 },
+   { 0xda, 0xfa },
+   { 0xdb, 0xfb },
+   { 0xdc, 0xfc },
+   { 0xdd, 0xfd },
+   { 0xde, 0xfe },
 
-    { 0xe0, 0xc0 },
-    { 0xe1, 0xc1 },
-    { 0xe2, 0xc2 },
-    { 0xe3, 0xc3 },
-    { 0xe4, 0xc4 },
-    { 0xe5, 0xc5 },
-    { 0xe6, 0xc6 },
-    { 0xe7, 0xc7 },
-    { 0xe8, 0xc8 },
-    { 0xe9, 0xc9 },
-    { 0xea, 0xca },
-    { 0xeb, 0xcb },
-    { 0xec, 0xcc },
-    { 0xed, 0xcd },
-    { 0xee, 0xce },
-    { 0xef, 0xcf },
+   { 0xe0, 0xc0 },
+   { 0xe1, 0xc1 },
+   { 0xe2, 0xc2 },
+   { 0xe3, 0xc3 },
+   { 0xe4, 0xc4 },
+   { 0xe5, 0xc5 },
+   { 0xe6, 0xc6 },
+   { 0xe7, 0xc7 },
+   { 0xe8, 0xc8 },
+   { 0xe9, 0xc9 },
+   { 0xea, 0xca },
+   { 0xeb, 0xcb },
+   { 0xec, 0xcc },
+   { 0xed, 0xcd },
+   { 0xee, 0xce },
+   { 0xef, 0xcf },
 
-    { 0xf0, 0xd0 },
-    { 0xf1, 0xd1 },
-    { 0xf2, 0xd2 },
-    { 0xf3, 0xd3 },
-    { 0xf4, 0xd4 },
-    { 0xf5, 0xd5 },
-    { 0xf6, 0xd6 },
-    { 0xf7, 0xd7 },
-    { 0xf8, 0xd8 },
-    { 0xf9, 0xd9 },
-    { 0xfa, 0xda },
-    { 0xfb, 0xdb },
-    { 0xfc, 0xdc },
-    { 0xfd, 0xdd },
-    { 0xfe, 0xde },
-    { 0xff, 0xaf }
+   { 0xf0, 0xd0 },
+   { 0xf1, 0xd1 },
+   { 0xf2, 0xd2 },
+   { 0xf3, 0xd3 },
+   { 0xf4, 0xd4 },
+   { 0xf5, 0xd5 },
+   { 0xf6, 0xd6 },
+   { 0xf7, 0xd7 },
+   { 0xf8, 0xd8 },
+   { 0xf9, 0xd9 },
+   { 0xfa, 0xda },
+   { 0xfb, 0xdb },
+   { 0xfc, 0xdc },
+   { 0xfd, 0xdd },
+   { 0xfe, 0xde },
+   { 0xff, 0xaf }
   };
 
   if (flag == ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) {
@@ -302,8 +274,7 @@ OnigEncodingType OnigEncodingISO_8859_14 = {
   1,             /* max enc length */
   1,             /* min enc length */
   (ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE |
-   ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE |
-   ONIGENC_AMBIGUOUS_MATCH_COMPOUND),
+   ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE ),
   {
       (OnigCodePoint )'\\'                       /* esc */
     , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.'  */
@@ -316,11 +287,11 @@ OnigEncodingType OnigEncodingISO_8859_14 = {
   onigenc_single_byte_mbc_to_code,
   onigenc_single_byte_code_to_mbclen,
   onigenc_single_byte_code_to_mbc,
-  iso_8859_14_mbc_to_normalize,
-  iso_8859_14_is_mbc_ambiguous,
-  iso_8859_14_get_all_pair_ambig_codes,
+  mbc_to_normalize,
+  is_mbc_ambiguous,
+  get_all_pair_ambig_codes,
   onigenc_ess_tsett_get_all_comp_ambig_codes,
-  iso_8859_14_is_code_ctype,
+  is_code_ctype,
   onigenc_not_support_get_ctype_code_range,
   onigenc_single_byte_left_adjust_char_head,
   onigenc_always_true_is_allowed_reverse_match
index 1a8bd7b4c5bc8c4388bff571296483da609f990c..f643b895df2d050192a158243d067e67dbc9c61d 100644 (file)
@@ -2,7 +2,7 @@
   iso8859_15.c -  Oniguruma (regular expression library)
 **********************************************************************/
 /*-
- * Copyright (c) 2002-2005  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp>
+ * Copyright (c) 2002-2007  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -33,7 +33,7 @@
 #define ENC_IS_ISO_8859_15_CTYPE(code,ctype) \
   ((EncISO_8859_15_CtypeTable[code] & ctype) != 0)
 
-static UChar EncISO_8859_15_ToLowerCaseTable[256] = {
+static const UChar EncISO_8859_15_ToLowerCaseTable[256] = {
   '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
   '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017',
   '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027',
@@ -68,23 +68,23 @@ static UChar EncISO_8859_15_ToLowerCaseTable[256] = {
   '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377'
 };
 
-static unsigned short EncISO_8859_15_CtypeTable[256] = {
+static const unsigned short EncISO_8859_15_CtypeTable[256] = {
   0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
   0x2008, 0x220c, 0x2209, 0x2208, 0x2208, 0x2208, 0x2008, 0x2008,
   0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
   0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
-  0x2284, 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x21a0, 0x21a0, 0x21a0,
-  0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
+  0x2284, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
+  0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
   0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0,
-  0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x20a0, 0x20a0, 0x20a0, 0x21a0,
+  0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
   0x21a0, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x34a2,
   0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
   0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
-  0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x31a0,
-  0x20a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2,
+  0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x31a0,
+  0x21a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2,
   0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
   0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
-  0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x20a0, 0x21a0, 0x20a0, 0x2008,
+  0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x2008,
   0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
   0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
   0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
@@ -104,21 +104,11 @@ static unsigned short EncISO_8859_15_CtypeTable[256] = {
 };
 
 static int
-iso_8859_15_mbc_to_normalize(OnigAmbigType flag,
-                            const UChar** pp, const UChar* end, UChar* lower)
+mbc_to_normalize(OnigAmbigType flag,
+                const UChar** pp, const UChar* end, UChar* lower)
 {
   const UChar* p = *pp;
 
-  if (end > p + 1 && (flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0) {
-    if ((*p == 's' && *(p+1) == 's') ||
-       ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
-        (*p == 'S' && *(p+1) == 'S'))) {
-      *lower = 0xdf;
-      (*pp) += 2;
-      return 1;
-    }
-  }
-
   if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
        ONIGENC_IS_MBC_ASCII(p)) ||
       ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 &&
@@ -133,27 +123,10 @@ iso_8859_15_mbc_to_normalize(OnigAmbigType flag,
 }
 
 static int
-iso_8859_15_is_mbc_ambiguous(OnigAmbigType flag,
-                            const UChar** pp, const UChar* end)
+is_mbc_ambiguous(OnigAmbigType flag, const UChar** pp, const UChar* end)
 {
   const UChar* p = *pp;
 
-  if ((flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0) {
-    if (end > p + 1) {
-      if ((*p == 's' && *(p+1) == 's') ||
-         ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
-          (*p == 'S' && *(p+1) == 'S'))) {
-       (*pp) += 2;
-       return TRUE;
-      }
-    }
-
-    if (*p == 0xdf) {
-      (*pp)++;
-      return TRUE;
-    }
-  }
-
   (*pp)++;
   if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
        ONIGENC_IS_MBC_ASCII(p)) ||
@@ -176,7 +149,7 @@ iso_8859_15_is_mbc_ambiguous(OnigAmbigType flag,
 }
 
 static int
-iso_8859_15_is_code_ctype(OnigCodePoint code, unsigned int ctype)
+is_code_ctype(OnigCodePoint code, unsigned int ctype)
 {
   if (code < 256)
     return ENC_IS_ISO_8859_15_CTYPE(code, ctype);
@@ -185,10 +158,10 @@ iso_8859_15_is_code_ctype(OnigCodePoint code, unsigned int ctype)
 }
 
 static int
-iso_8859_15_get_all_pair_ambig_codes(OnigAmbigType flag,
-                                     OnigPairAmbigCodes** ccs)
+get_all_pair_ambig_codes(OnigAmbigType flag,
+                        const OnigPairAmbigCodes** ccs)
 {
-  static OnigPairAmbigCodes cc[] = {
+  static const OnigPairAmbigCodes cc[] = {
     { 0xa6, 0xa8 },
     { 0xa8, 0xa6 },
 
@@ -282,8 +255,7 @@ OnigEncodingType OnigEncodingISO_8859_15 = {
   1,             /* max enc length */
   1,             /* min enc length */
   (ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE |
-   ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE |
-   ONIGENC_AMBIGUOUS_MATCH_COMPOUND),
+   ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE ),
   {
       (OnigCodePoint )'\\'                       /* esc */
     , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.'  */
@@ -296,11 +268,11 @@ OnigEncodingType OnigEncodingISO_8859_15 = {
   onigenc_single_byte_mbc_to_code,
   onigenc_single_byte_code_to_mbclen,
   onigenc_single_byte_code_to_mbc,
-  iso_8859_15_mbc_to_normalize,
-  iso_8859_15_is_mbc_ambiguous,
-  iso_8859_15_get_all_pair_ambig_codes,
+  mbc_to_normalize,
+  is_mbc_ambiguous,
+  get_all_pair_ambig_codes,
   onigenc_ess_tsett_get_all_comp_ambig_codes,
-  iso_8859_15_is_code_ctype,
+  is_code_ctype,
   onigenc_not_support_get_ctype_code_range,
   onigenc_single_byte_left_adjust_char_head,
   onigenc_always_true_is_allowed_reverse_match
index e283db17ccf600db904bd46856321845fdce373b..921ae36d9d22a2061dea4ab5058d995591eb00e6 100644 (file)
@@ -2,7 +2,7 @@
   iso8859_16.c -  Oniguruma (regular expression library)
 **********************************************************************/
 /*-
- * Copyright (c) 2002-2005  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp>
+ * Copyright (c) 2002-2007  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -33,7 +33,7 @@
 #define ENC_IS_ISO_8859_16_CTYPE(code,ctype) \
   ((EncISO_8859_16_CtypeTable[code] & ctype) != 0)
 
-static UChar EncISO_8859_16_ToLowerCaseTable[256] = {
+static const UChar EncISO_8859_16_ToLowerCaseTable[256] = {
   '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
   '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017',
   '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027',
@@ -68,23 +68,23 @@ static UChar EncISO_8859_16_ToLowerCaseTable[256] = {
   '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377'
 };
 
-static unsigned short EncISO_8859_16_CtypeTable[256] = {
+static const unsigned short EncISO_8859_16_CtypeTable[256] = {
   0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
   0x2008, 0x220c, 0x2209, 0x2208, 0x2208, 0x2208, 0x2008, 0x2008,
   0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
   0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
-  0x2284, 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x21a0, 0x21a0, 0x21a0,
-  0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
+  0x2284, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
+  0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
   0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0,
-  0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x20a0, 0x20a0, 0x20a0, 0x21a0,
+  0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
   0x21a0, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x34a2,
   0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
   0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
-  0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x31a0,
-  0x20a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2,
+  0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x31a0,
+  0x21a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2,
   0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
   0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
-  0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x20a0, 0x21a0, 0x20a0, 0x2008,
+  0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x2008,
   0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
   0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
   0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
@@ -104,21 +104,11 @@ static unsigned short EncISO_8859_16_CtypeTable[256] = {
 };
 
 static int
-iso_8859_16_mbc_to_normalize(OnigAmbigType flag,
-                            const UChar** pp, const UChar* end, UChar* lower)
+mbc_to_normalize(OnigAmbigType flag,
+                const UChar** pp, const UChar* end, UChar* lower)
 {
   const UChar* p = *pp;
 
-  if (end > p + 1 && (flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0) {
-    if ((*p == 's' && *(p+1) == 's') ||
-       ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
-        (*p == 'S' && *(p+1) == 'S'))) {
-      *lower = 0xdf;
-      (*pp) += 2;
-      return 1;
-    }
-  }
-
   if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
        ONIGENC_IS_MBC_ASCII(p)) ||
       ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 &&
@@ -133,27 +123,10 @@ iso_8859_16_mbc_to_normalize(OnigAmbigType flag,
 }
 
 static int
-iso_8859_16_is_mbc_ambiguous(OnigAmbigType flag,
-                            const UChar** pp, const UChar* end)
+is_mbc_ambiguous(OnigAmbigType flag, const UChar** pp, const UChar* end)
 {
   const UChar* p = *pp;
 
-  if ((flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0) {
-    if (end > p + 1) {
-      if ((*p == 's' && *(p+1) == 's') ||
-         ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
-          (*p == 'S' && *(p+1) == 'S'))) {
-       (*pp) += 2;
-       return TRUE;
-      }
-    }
-
-    if (*p == 0xdf) {
-      (*pp)++;
-      return TRUE;
-    }
-  }
-
   (*pp)++;
   if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
        ONIGENC_IS_MBC_ASCII(p)) ||
@@ -176,7 +149,7 @@ iso_8859_16_is_mbc_ambiguous(OnigAmbigType flag,
 }
 
 static int
-iso_8859_16_is_code_ctype(OnigCodePoint code, unsigned int ctype)
+is_code_ctype(OnigCodePoint code, unsigned int ctype)
 {
   if (code < 256)
     return ENC_IS_ISO_8859_16_CTYPE(code, ctype);
@@ -185,97 +158,96 @@ iso_8859_16_is_code_ctype(OnigCodePoint code, unsigned int ctype)
 }
 
 static int
-iso_8859_16_get_all_pair_ambig_codes(OnigAmbigType flag,
-                                     OnigPairAmbigCodes** ccs)
+get_all_pair_ambig_codes(OnigAmbigType flag, const OnigPairAmbigCodes** ccs)
 {
-  static OnigPairAmbigCodes cc[] = {
-    { 0xa1, 0xa2 },
-    { 0xa2, 0xa1 },
-    { 0xa3, 0xb3 },
-    { 0xa6, 0xa8 },
-    { 0xa8, 0xa6 },
-    { 0xaa, 0xba },
-    { 0xac, 0xae },
-    { 0xae, 0xac },
-    { 0xaf, 0xbf },
+  static const OnigPairAmbigCodes cc[] = {
+   { 0xa1, 0xa2 },
+   { 0xa2, 0xa1 },
+   { 0xa3, 0xb3 },
+   { 0xa6, 0xa8 },
+   { 0xa8, 0xa6 },
+   { 0xaa, 0xba },
+   { 0xac, 0xae },
+   { 0xae, 0xac },
+   { 0xaf, 0xbf },
 
-    { 0xb2, 0xb9 },
-    { 0xb3, 0xa3 },
-    { 0xb4, 0xb8 },
-    { 0xb8, 0xb4 },
-    { 0xb9, 0xb2 },
-    { 0xba, 0xaa },
-    { 0xbc, 0xbd },
-    { 0xbd, 0xbc },
-    { 0xbe, 0xff },
-    { 0xbf, 0xaf },
+   { 0xb2, 0xb9 },
+   { 0xb3, 0xa3 },
+   { 0xb4, 0xb8 },
+   { 0xb8, 0xb4 },
+   { 0xb9, 0xb2 },
+   { 0xba, 0xaa },
+   { 0xbc, 0xbd },
+   { 0xbd, 0xbc },
+   { 0xbe, 0xff },
+   { 0xbf, 0xaf },
 
-    { 0xc0, 0xe0 },
-    { 0xc1, 0xe1 },
-    { 0xc2, 0xe2 },
-    { 0xc3, 0xe3 },
-    { 0xc4, 0xe4 },
-    { 0xc5, 0xe5 },
-    { 0xc6, 0xe6 },
-    { 0xc7, 0xe7 },
-    { 0xc8, 0xe8 },
-    { 0xc9, 0xe9 },
-    { 0xca, 0xea },
-    { 0xcb, 0xeb },
-    { 0xcc, 0xec },
-    { 0xcd, 0xed },
-    { 0xce, 0xee },
-    { 0xcf, 0xef },
+   { 0xc0, 0xe0 },
+   { 0xc1, 0xe1 },
+   { 0xc2, 0xe2 },
+   { 0xc3, 0xe3 },
+   { 0xc4, 0xe4 },
+   { 0xc5, 0xe5 },
+   { 0xc6, 0xe6 },
+   { 0xc7, 0xe7 },
+   { 0xc8, 0xe8 },
+   { 0xc9, 0xe9 },
+   { 0xca, 0xea },
+   { 0xcb, 0xeb },
+   { 0xcc, 0xec },
+   { 0xcd, 0xed },
+   { 0xce, 0xee },
+   { 0xcf, 0xef },
 
-    { 0xd0, 0xf0 },
-    { 0xd1, 0xf1 },
-    { 0xd2, 0xf2 },
-    { 0xd3, 0xf3 },
-    { 0xd4, 0xf4 },
-    { 0xd5, 0xf5 },
-    { 0xd6, 0xf6 },
-    { 0xd7, 0xf7 },
-    { 0xd8, 0xf8 },
-    { 0xd9, 0xf9 },
-    { 0xda, 0xfa },
-    { 0xdb, 0xfb },
-    { 0xdc, 0xfc },
-    { 0xdd, 0xfd },
-    { 0xde, 0xfe },
+   { 0xd0, 0xf0 },
+   { 0xd1, 0xf1 },
+   { 0xd2, 0xf2 },
+   { 0xd3, 0xf3 },
+   { 0xd4, 0xf4 },
+   { 0xd5, 0xf5 },
+   { 0xd6, 0xf6 },
+   { 0xd7, 0xf7 },
+   { 0xd8, 0xf8 },
+   { 0xd9, 0xf9 },
+   { 0xda, 0xfa },
+   { 0xdb, 0xfb },
+   { 0xdc, 0xfc },
+   { 0xdd, 0xfd },
+   { 0xde, 0xfe },
 
-    { 0xe0, 0xc0 },
-    { 0xe1, 0xc1 },
-    { 0xe2, 0xc2 },
-    { 0xe3, 0xc3 },
-    { 0xe4, 0xc4 },
-    { 0xe5, 0xc5 },
-    { 0xe6, 0xc6 },
-    { 0xe7, 0xc7 },
-    { 0xe8, 0xc8 },
-    { 0xe9, 0xc9 },
-    { 0xea, 0xca },
-    { 0xeb, 0xcb },
-    { 0xec, 0xcc },
-    { 0xed, 0xcd },
-    { 0xee, 0xce },
-    { 0xef, 0xcf },
+   { 0xe0, 0xc0 },
+   { 0xe1, 0xc1 },
+   { 0xe2, 0xc2 },
+   { 0xe3, 0xc3 },
+   { 0xe4, 0xc4 },
+   { 0xe5, 0xc5 },
+   { 0xe6, 0xc6 },
+   { 0xe7, 0xc7 },
+   { 0xe8, 0xc8 },
+   { 0xe9, 0xc9 },
+   { 0xea, 0xca },
+   { 0xeb, 0xcb },
+   { 0xec, 0xcc },
+   { 0xed, 0xcd },
+   { 0xee, 0xce },
+   { 0xef, 0xcf },
 
-    { 0xf0, 0xd0 },
-    { 0xf1, 0xd1 },
-    { 0xf2, 0xd2 },
-    { 0xf3, 0xd3 },
-    { 0xf4, 0xd4 },
-    { 0xf5, 0xd5 },
-    { 0xf6, 0xd6 },
-    { 0xf7, 0xd7 },
-    { 0xf8, 0xd8 },
-    { 0xf9, 0xd9 },
-    { 0xfa, 0xda },
-    { 0xfb, 0xdb },
-    { 0xfc, 0xdc },
-    { 0xfd, 0xdd },
-    { 0xfe, 0xde },
-    { 0xff, 0xbe }
+   { 0xf0, 0xd0 },
+   { 0xf1, 0xd1 },
+   { 0xf2, 0xd2 },
+   { 0xf3, 0xd3 },
+   { 0xf4, 0xd4 },
+   { 0xf5, 0xd5 },
+   { 0xf6, 0xd6 },
+   { 0xf7, 0xd7 },
+   { 0xf8, 0xd8 },
+   { 0xf9, 0xd9 },
+   { 0xfa, 0xda },
+   { 0xfb, 0xdb },
+   { 0xfc, 0xdc },
+   { 0xfd, 0xdd },
+   { 0xfe, 0xde },
+   { 0xff, 0xbe }
   };
 
   if (flag == ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) {
@@ -296,8 +268,7 @@ OnigEncodingType OnigEncodingISO_8859_16 = {
   1,             /* max enc length */
   1,             /* min enc length */
   (ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE |
-   ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE |
-   ONIGENC_AMBIGUOUS_MATCH_COMPOUND),
+   ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE ),
   {
       (OnigCodePoint )'\\'                       /* esc */
     , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.'  */
@@ -310,11 +281,11 @@ OnigEncodingType OnigEncodingISO_8859_16 = {
   onigenc_single_byte_mbc_to_code,
   onigenc_single_byte_code_to_mbclen,
   onigenc_single_byte_code_to_mbc,
-  iso_8859_16_mbc_to_normalize,
-  iso_8859_16_is_mbc_ambiguous,
-  iso_8859_16_get_all_pair_ambig_codes,
+  mbc_to_normalize,
+  is_mbc_ambiguous,
+  get_all_pair_ambig_codes,
   onigenc_ess_tsett_get_all_comp_ambig_codes,
-  iso_8859_16_is_code_ctype,
+  is_code_ctype,
   onigenc_not_support_get_ctype_code_range,
   onigenc_single_byte_left_adjust_char_head,
   onigenc_always_true_is_allowed_reverse_match
index e86415b9c9631f21acd52d83010d5cb23690f347..f8cb3756f2849a630d26e4ac055e22d7f7d43a03 100644 (file)
@@ -2,7 +2,7 @@
   iso8859_2.c -  Oniguruma (regular expression library)
 **********************************************************************/
 /*-
- * Copyright (c) 2002-2005  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp>
+ * Copyright (c) 2002-2006  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -33,7 +33,7 @@
 #define ENC_IS_ISO_8859_2_CTYPE(code,ctype) \
   ((EncISO_8859_2_CtypeTable[code] & ctype) != 0)
 
-static UChar EncISO_8859_2_ToLowerCaseTable[256] = {
+static const UChar EncISO_8859_2_ToLowerCaseTable[256] = {
   '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
   '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017',
   '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027',
@@ -68,23 +68,23 @@ static UChar EncISO_8859_2_ToLowerCaseTable[256] = {
   '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377'
 };
 
-static unsigned short EncISO_8859_2_CtypeTable[256] = {
+static const unsigned short EncISO_8859_2_CtypeTable[256] = {
   0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
   0x2008, 0x220c, 0x2209, 0x2208, 0x2208, 0x2208, 0x2008, 0x2008,
   0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
   0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
-  0x2284, 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x21a0, 0x21a0, 0x21a0,
-  0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
+  0x2284, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
+  0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
   0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0,
-  0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x20a0, 0x20a0, 0x20a0, 0x21a0,
+  0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
   0x21a0, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x34a2,
   0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
   0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
-  0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x31a0,
-  0x20a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2,
+  0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x31a0,
+  0x21a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2,
   0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
   0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
-  0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x20a0, 0x21a0, 0x20a0, 0x2008,
+  0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x2008,
   0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
   0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
   0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
@@ -109,16 +109,6 @@ iso_8859_2_mbc_to_normalize(OnigAmbigType flag,
 {
   const UChar* p = *pp;
 
-  if (end > p + 1 && (flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0) {
-    if ((*p == 's' && *(p+1) == 's') ||
-       ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
-        (*p == 'S' && *(p+1) == 'S'))) {
-      *lower = 0xdf;
-      (*pp) += 2;
-      return 1;
-    }
-  }
-
   if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
        ONIGENC_IS_MBC_ASCII(p)) ||
       ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 &&
@@ -138,22 +128,6 @@ iso_8859_2_is_mbc_ambiguous(OnigAmbigType flag,
 {
   const UChar* p = *pp;
 
-  if ((flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0) {
-    if (end > p + 1) {
-      if ((*p == 's' && *(p+1) == 's') ||
-         ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
-          (*p == 'S' && *(p+1) == 'S'))) {
-       (*pp) += 2;
-       return TRUE;
-      }
-    }
-
-    if (*p == 0xdf) {
-      (*pp)++;
-      return TRUE;
-    }
-  }
-
   (*pp)++;
   if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
        ONIGENC_IS_MBC_ASCII(p)) ||
@@ -177,9 +151,9 @@ iso_8859_2_is_mbc_ambiguous(OnigAmbigType flag,
 
 static int
 iso_8859_2_get_all_pair_ambig_codes(OnigAmbigType flag,
-                                    OnigPairAmbigCodes** ccs)
+                                    const OnigPairAmbigCodes** ccs)
 {
-  static OnigPairAmbigCodes cc[] = {
+  static const OnigPairAmbigCodes cc[] = {
     { 0xa1, 0xb1 },
     { 0xa3, 0xb3 },
     { 0xa5, 0xb5 },
@@ -294,8 +268,7 @@ OnigEncodingType OnigEncodingISO_8859_2 = {
   1,             /* max enc length */
   1,             /* min enc length */
   (ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE |
-   ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE |
-   ONIGENC_AMBIGUOUS_MATCH_COMPOUND),
+   ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE ),
   {
       (OnigCodePoint )'\\'                       /* esc */
     , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.'  */
index 76d2bec8a8766881b6d8e5e024d43547d7ba35e4..e62d20de7b14a97c3af4950644481dffe3e59aad 100644 (file)
@@ -2,7 +2,7 @@
   iso8859_3.c -  Oniguruma (regular expression library)
 **********************************************************************/
 /*-
- * Copyright (c) 2002-2005  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp>
+ * Copyright (c) 2002-2006  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -33,7 +33,7 @@
 #define ENC_IS_ISO_8859_3_CTYPE(code,ctype) \
   ((EncISO_8859_3_CtypeTable[code] & ctype) != 0)
 
-static UChar EncISO_8859_3_ToLowerCaseTable[256] = {
+static const UChar EncISO_8859_3_ToLowerCaseTable[256] = {
   '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
   '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017',
   '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027',
@@ -68,23 +68,23 @@ static UChar EncISO_8859_3_ToLowerCaseTable[256] = {
   '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377'
 };
 
-static unsigned short EncISO_8859_3_CtypeTable[256] = {
+static const unsigned short EncISO_8859_3_CtypeTable[256] = {
   0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
   0x2008, 0x220c, 0x2209, 0x2208, 0x2208, 0x2208, 0x2008, 0x2008,
   0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
   0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
-  0x2284, 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x21a0, 0x21a0, 0x21a0,
-  0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
+  0x2284, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
+  0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
   0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0,
-  0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x20a0, 0x20a0, 0x20a0, 0x21a0,
+  0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
   0x21a0, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x34a2,
   0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
   0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
-  0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x31a0,
-  0x20a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2,
+  0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x31a0,
+  0x21a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2,
   0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
   0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
-  0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x20a0, 0x21a0, 0x20a0, 0x2008,
+  0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x2008,
   0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
   0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
   0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
@@ -109,16 +109,6 @@ iso_8859_3_mbc_to_normalize(OnigAmbigType flag,
 {
   const UChar* p = *pp;
 
-  if (end > p + 1 && (flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0) {
-    if ((*p == 's' && *(p+1) == 's') ||
-       ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
-        (*p == 'S' && *(p+1) == 'S'))) {
-      *lower = 0xdf;
-      (*pp) += 2;
-      return 1;
-    }
-  }
-
   if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
        ONIGENC_IS_MBC_ASCII(p)) ||
       ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 &&
@@ -138,22 +128,6 @@ iso_8859_3_is_mbc_ambiguous(OnigAmbigType flag,
 {
   const UChar* p = *pp;
 
-  if ((flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0) {
-    if (end > p + 1) {
-      if ((*p == 's' && *(p+1) == 's') ||
-         ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
-          (*p == 'S' && *(p+1) == 'S'))) {
-       (*pp) += 2;
-       return TRUE;
-      }
-    }
-
-    if (*p == 0xdf) {
-      (*pp)++;
-      return TRUE;
-    }
-  }
-
   (*pp)++;
   if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
        ONIGENC_IS_MBC_ASCII(p)) ||
@@ -186,9 +160,9 @@ iso_8859_3_is_code_ctype(OnigCodePoint code, unsigned int ctype)
 
 static int
 iso_8859_3_get_all_pair_ambig_codes(OnigAmbigType flag,
-                                    OnigPairAmbigCodes** ccs)
+                                    const OnigPairAmbigCodes** ccs)
 {
-  static OnigPairAmbigCodes cc[] = {
+  static const OnigPairAmbigCodes cc[] = {
     { 0xa1, 0xb1 },
     { 0xa6, 0xb6 },
     { 0xa9, 0xb9 },
@@ -283,8 +257,7 @@ OnigEncodingType OnigEncodingISO_8859_3 = {
   1,             /* max enc length */
   1,             /* min enc length */
   (ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE |
-   ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE |
-   ONIGENC_AMBIGUOUS_MATCH_COMPOUND),
+   ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE ),
   {
       (OnigCodePoint )'\\'                       /* esc */
     , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.'  */
index 7569006725c7fc374c692cf6faf7346a15cae527..dd6bd7dfe35155a98c664bb0c544e54e6b8dc99f 100644 (file)
@@ -2,7 +2,7 @@
   iso8859_4.c -  Oniguruma (regular expression library)
 **********************************************************************/
 /*-
- * Copyright (c) 2002-2005  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp>
+ * Copyright (c) 2002-2006  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -33,7 +33,7 @@
 #define ENC_IS_ISO_8859_4_CTYPE(code,ctype) \
   ((EncISO_8859_4_CtypeTable[code] & ctype) != 0)
 
-static UChar EncISO_8859_4_ToLowerCaseTable[256] = {
+static const UChar EncISO_8859_4_ToLowerCaseTable[256] = {
   '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
   '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017',
   '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027',
@@ -68,23 +68,23 @@ static UChar EncISO_8859_4_ToLowerCaseTable[256] = {
   '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377'
 };
 
-static unsigned short EncISO_8859_4_CtypeTable[256] = {
+static const unsigned short EncISO_8859_4_CtypeTable[256] = {
   0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
   0x2008, 0x220c, 0x2209, 0x2208, 0x2208, 0x2208, 0x2008, 0x2008,
   0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
   0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
-  0x2284, 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x21a0, 0x21a0, 0x21a0,
-  0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
+  0x2284, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
+  0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
   0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0,
-  0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x20a0, 0x20a0, 0x20a0, 0x21a0,
+  0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
   0x21a0, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x34a2,
   0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
   0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
-  0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x31a0,
-  0x20a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2,
+  0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x31a0,
+  0x21a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2,
   0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
   0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
-  0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x20a0, 0x21a0, 0x20a0, 0x2008,
+  0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x2008,
   0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
   0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
   0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
@@ -109,16 +109,6 @@ iso_8859_4_mbc_to_normalize(OnigAmbigType flag,
 {
   const UChar* p = *pp;
 
-  if (end > p + 1 && (flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0) {
-    if ((*p == 's' && *(p+1) == 's') ||
-       ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
-        (*p == 'S' && *(p+1) == 'S'))) {
-      *lower = 0xdf;
-      (*pp) += 2;
-      return 1;
-    }
-  }
-
   if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
        ONIGENC_IS_MBC_ASCII(p)) ||
       ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 &&
@@ -138,22 +128,6 @@ iso_8859_4_is_mbc_ambiguous(OnigAmbigType flag,
 {
   const UChar* p = *pp;
 
-  if ((flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0) {
-    if (end > p + 1) {
-      if ((*p == 's' && *(p+1) == 's') ||
-         ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
-          (*p == 'S' && *(p+1) == 'S'))) {
-       (*pp) += 2;
-       return TRUE;
-      }
-    }
-
-    if (*p == 0xdf) {
-      (*pp)++;
-      return TRUE;
-    }
-  }
-
   (*pp)++;
   if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
        ONIGENC_IS_MBC_ASCII(p)) ||
@@ -186,9 +160,9 @@ iso_8859_4_is_code_ctype(OnigCodePoint code, unsigned int ctype)
 
 static int
 iso_8859_4_get_all_pair_ambig_codes(OnigAmbigType flag,
-                                    OnigPairAmbigCodes** ccs)
+                                    const OnigPairAmbigCodes** ccs)
 {
-  static OnigPairAmbigCodes cc[] = {
+  static const OnigPairAmbigCodes cc[] = {
     { 0xa1, 0xb1 },
     { 0xa3, 0xb3 },
     { 0xa5, 0xb5 },
@@ -292,8 +266,7 @@ OnigEncodingType OnigEncodingISO_8859_4 = {
   1,             /* max enc length */
   1,             /* min enc length */
   (ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE |
-   ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE |
-   ONIGENC_AMBIGUOUS_MATCH_COMPOUND),
+   ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE ),
   {
       (OnigCodePoint )'\\'                       /* esc */
     , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.'  */
index 2f7677b3e7a9dee8aa7a2def79ec5ffc123d4a99..87b7fb8a29de6b268fed0c6811f08897a665ca2b 100644 (file)
@@ -33,7 +33,7 @@
 #define ENC_IS_ISO_8859_5_CTYPE(code,ctype) \
   ((EncISO_8859_5_CtypeTable[code] & ctype) != 0)
 
-static UChar EncISO_8859_5_ToLowerCaseTable[256] = {
+static const UChar EncISO_8859_5_ToLowerCaseTable[256] = {
   '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
   '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017',
   '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027',
@@ -68,23 +68,23 @@ static UChar EncISO_8859_5_ToLowerCaseTable[256] = {
   '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377'
 };
 
-static unsigned short EncISO_8859_5_CtypeTable[256] = {
+static const unsigned short EncISO_8859_5_CtypeTable[256] = {
   0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
   0x2008, 0x220c, 0x2209, 0x2208, 0x2208, 0x2208, 0x2008, 0x2008,
   0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
   0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
-  0x2284, 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x21a0, 0x21a0, 0x21a0,
-  0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
+  0x2284, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
+  0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
   0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0,
-  0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x20a0, 0x20a0, 0x20a0, 0x21a0,
+  0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
   0x21a0, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x34a2,
   0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
   0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
-  0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x31a0,
-  0x20a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2,
+  0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x31a0,
+  0x21a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2,
   0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
   0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
-  0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x20a0, 0x21a0, 0x20a0, 0x2008,
+  0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x2008,
   0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
   0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
   0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
@@ -152,9 +152,9 @@ iso_8859_5_is_code_ctype(OnigCodePoint code, unsigned int ctype)
 
 static int
 iso_8859_5_get_all_pair_ambig_codes(OnigAmbigType flag,
-                                    OnigPairAmbigCodes** ccs)
+                                    const OnigPairAmbigCodes** ccs)
 {
-  static OnigPairAmbigCodes cc[] = {
+  static const OnigPairAmbigCodes cc[] = {
     { 0xa1, 0xf1 },
     { 0xa2, 0xf2 },
     { 0xa3, 0xf3 },
index 0fcb9e8b8363318aa6800f28f823c66ff1333e9d..fffcd0e7d19d1ea230b9fac5cbd28495b426b475 100644 (file)
 #define ENC_IS_ISO_8859_6_CTYPE(code,ctype) \
   ((EncISO_8859_6_CtypeTable[code] & ctype) != 0)
 
-static unsigned short EncISO_8859_6_CtypeTable[256] = {
+static const unsigned short EncISO_8859_6_CtypeTable[256] = {
   0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
   0x2008, 0x220c, 0x2209, 0x2208, 0x2208, 0x2208, 0x2008, 0x2008,
   0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
   0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
-  0x2284, 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x21a0, 0x21a0, 0x21a0,
-  0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
+  0x2284, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
+  0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
   0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0,
-  0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x20a0, 0x20a0, 0x20a0, 0x21a0,
+  0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
   0x21a0, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x34a2,
   0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
   0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
-  0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x31a0,
-  0x20a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2,
+  0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x31a0,
+  0x21a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2,
   0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
   0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
-  0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x20a0, 0x21a0, 0x20a0, 0x2008,
+  0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x2008,
   0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
   0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
   0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
index 8b2cb9ec592101940322ddd641256e89875f06a4..e87661d84bb5ef6243eddf882ebc635233545836 100644 (file)
@@ -33,7 +33,7 @@
 #define ENC_IS_ISO_8859_7_CTYPE(code,ctype) \
   ((EncISO_8859_7_CtypeTable[code] & ctype) != 0)
 
-static UChar EncISO_8859_7_ToLowerCaseTable[256] = {
+static const UChar EncISO_8859_7_ToLowerCaseTable[256] = {
   '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
   '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017',
   '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027',
@@ -68,23 +68,23 @@ static UChar EncISO_8859_7_ToLowerCaseTable[256] = {
   '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377'
 };
 
-static unsigned short EncISO_8859_7_CtypeTable[256] = {
+static const unsigned short EncISO_8859_7_CtypeTable[256] = {
   0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
   0x2008, 0x220c, 0x2209, 0x2208, 0x2208, 0x2208, 0x2008, 0x2008,
   0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
   0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
-  0x2284, 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x21a0, 0x21a0, 0x21a0,
-  0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
+  0x2284, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
+  0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
   0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0,
-  0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x20a0, 0x20a0, 0x20a0, 0x21a0,
+  0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
   0x21a0, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x34a2,
   0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
   0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
-  0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x31a0,
-  0x20a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2,
+  0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x31a0,
+  0x21a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2,
   0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
   0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
-  0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x20a0, 0x21a0, 0x20a0, 0x2008,
+  0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x2008,
   0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
   0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
   0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
@@ -159,9 +159,9 @@ iso_8859_7_is_code_ctype(OnigCodePoint code, unsigned int ctype)
 
 static int
 iso_8859_7_get_all_pair_ambig_codes(OnigAmbigType flag,
-                                    OnigPairAmbigCodes** ccs)
+                                    const OnigPairAmbigCodes** ccs)
 {
-  static OnigPairAmbigCodes cc[] = {
+  static const OnigPairAmbigCodes cc[] = {
     { 0xb6, 0xdc },
     { 0xb8, 0xdd },
     { 0xb9, 0xde },
index 3c95b9b1375e518f330fb83de81cd507e9d201cd..e76966c667f63b88be02941fd01a984c65cee995 100644 (file)
 #define ENC_IS_ISO_8859_8_CTYPE(code,ctype) \
   ((EncISO_8859_8_CtypeTable[code] & ctype) != 0)
 
-static unsigned short EncISO_8859_8_CtypeTable[256] = {
+static const unsigned short EncISO_8859_8_CtypeTable[256] = {
   0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
   0x2008, 0x220c, 0x2209, 0x2208, 0x2208, 0x2208, 0x2008, 0x2008,
   0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
   0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
-  0x2284, 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x21a0, 0x21a0, 0x21a0,
-  0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
+  0x2284, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
+  0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
   0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0,
-  0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x20a0, 0x20a0, 0x20a0, 0x21a0,
+  0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
   0x21a0, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x34a2,
   0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
   0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
-  0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x31a0,
-  0x20a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2,
+  0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x31a0,
+  0x21a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2,
   0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
   0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
-  0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x20a0, 0x21a0, 0x20a0, 0x2008,
+  0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x2008,
   0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
   0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
   0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
index 1b061ff6ea799c8afd2c17b6ce0ab61cec0d467d..16a30c5f2479d0c1abc74a23e8b85d01198c717c 100644 (file)
@@ -2,7 +2,7 @@
   iso8859_9.c -  Oniguruma (regular expression library)
 **********************************************************************/
 /*-
- * Copyright (c) 2002-2005  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp>
+ * Copyright (c) 2002-2006  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -33,7 +33,7 @@
 #define ENC_IS_ISO_8859_9_CTYPE(code,ctype) \
   ((EncISO_8859_9_CtypeTable[code] & ctype) != 0)
 
-static UChar EncISO_8859_9_ToLowerCaseTable[256] = {
+static const UChar EncISO_8859_9_ToLowerCaseTable[256] = {
   '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
   '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017',
   '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027',
@@ -68,23 +68,23 @@ static UChar EncISO_8859_9_ToLowerCaseTable[256] = {
   '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377'
 };
 
-static unsigned short EncISO_8859_9_CtypeTable[256] = {
+static const unsigned short EncISO_8859_9_CtypeTable[256] = {
   0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
   0x2008, 0x220c, 0x2209, 0x2208, 0x2208, 0x2208, 0x2008, 0x2008,
   0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
   0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
-  0x2284, 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x21a0, 0x21a0, 0x21a0,
-  0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
+  0x2284, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
+  0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
   0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0,
-  0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x20a0, 0x20a0, 0x20a0, 0x21a0,
+  0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
   0x21a0, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x34a2,
   0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
   0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
-  0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x31a0,
-  0x20a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2,
+  0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x31a0,
+  0x21a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2,
   0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
   0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
-  0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x20a0, 0x21a0, 0x20a0, 0x2008,
+  0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x2008,
   0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
   0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
   0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
@@ -109,16 +109,6 @@ iso_8859_9_mbc_to_normalize(OnigAmbigType flag,
 {
   const UChar* p = *pp;
 
-  if (end > p + 1 && (flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0) {
-    if ((*p == 's' && *(p+1) == 's') ||
-       ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
-        (*p == 'S' && *(p+1) == 'S'))) {
-      *lower = 0xdf;
-      (*pp) += 2;
-      return 1;
-    }
-  }
-
   if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
        ONIGENC_IS_MBC_ASCII(p)) ||
       ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 &&
@@ -138,22 +128,6 @@ iso_8859_9_is_mbc_ambiguous(OnigAmbigType flag,
 {
   const UChar* p = *pp;
 
-  if ((flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0) {
-    if (end > p + 1) {
-      if ((*p == 's' && *(p+1) == 's') ||
-         ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
-          (*p == 'S' && *(p+1) == 'S'))) {
-       (*pp) += 2;
-       return TRUE;
-      }
-    }
-
-    if (*p == 0xdf) {
-      (*pp)++;
-      return TRUE;
-    }
-  }
-
   (*pp)++;
   if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
        ONIGENC_IS_MBC_ASCII(p)) ||
@@ -186,9 +160,9 @@ iso_8859_9_is_code_ctype(OnigCodePoint code, unsigned int ctype)
 
 static int
 iso_8859_9_get_all_pair_ambig_codes(OnigAmbigType flag,
-                                    OnigPairAmbigCodes** ccs)
+                                    const OnigPairAmbigCodes** ccs)
 {
-  static OnigPairAmbigCodes cc[] = {
+  static const OnigPairAmbigCodes cc[] = {
     { 0xc0, 0xe0 },
     { 0xc1, 0xe1 },
     { 0xc2, 0xe2 },
@@ -272,8 +246,7 @@ OnigEncodingType OnigEncodingISO_8859_9 = {
   1,             /* max enc length */
   1,             /* min enc length */
   (ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE |
-   ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE |
-   ONIGENC_AMBIGUOUS_MATCH_COMPOUND),
+   ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE ),
   {
       (OnigCodePoint )'\\'                       /* esc */
     , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.'  */
index f8a5a1da617fdb0715b7b24d6720a33c6c9ade3b..d7277e862e978e4fb3c157894b2901ad258d7a4d 100644 (file)
@@ -33,7 +33,7 @@
 #define ENC_IS_KOI8_CTYPE(code,ctype) \
   ((EncKOI8_CtypeTable[code] & ctype) != 0)
 
-static UChar EncKOI8_ToLowerCaseTable[256] = {
+static const UChar EncKOI8_ToLowerCaseTable[256] = {
   '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
   '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017',
   '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027',
@@ -68,23 +68,23 @@ static UChar EncKOI8_ToLowerCaseTable[256] = {
   '\330', '\331', '\332', '\333', '\334', '\335', '\336', '\337'
 };
 
-static unsigned short EncKOI8_CtypeTable[256] = {
+static const unsigned short EncKOI8_CtypeTable[256] = {
   0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
   0x2008, 0x220c, 0x2209, 0x2208, 0x2208, 0x2208, 0x2008, 0x2008,
   0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
   0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
-  0x2284, 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x21a0, 0x21a0, 0x21a0,
-  0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
+  0x2284, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
+  0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
   0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0,
-  0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x20a0, 0x20a0, 0x20a0, 0x21a0,
+  0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
   0x21a0, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x34a2,
   0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
   0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
-  0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x31a0,
-  0x20a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2,
+  0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x31a0,
+  0x21a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2,
   0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
   0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
-  0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x20a0, 0x21a0, 0x20a0, 0x2008,
+  0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x2008,
   0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
   0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
   0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
@@ -105,9 +105,9 @@ static unsigned short EncKOI8_CtypeTable[256] = {
 
 static int
 koi8_mbc_to_normalize(OnigAmbigType flag,
-                            const UChar** pp, const UChar* end, UChar* lower)
+                            const OnigUChar** pp, const OnigUChar* end, OnigUChar* lower)
 {
-  UChar* p = (UChar *)*pp;
+  const OnigUChar* p = *pp;
 
   if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
        ONIGENC_IS_MBC_ASCII(p)) ||
@@ -123,9 +123,9 @@ koi8_mbc_to_normalize(OnigAmbigType flag,
 }
 
 static int
-koi8_is_mbc_ambiguous(OnigAmbigType flag, const UChar** pp, const UChar* end)
+koi8_is_mbc_ambiguous(OnigAmbigType flag, const OnigUChar** pp, const OnigUChar* end)
 {
-  UChar* p = (UChar *)*pp;
+  const OnigUChar* p = *pp;
 
   (*pp)++;
   if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
@@ -151,9 +151,9 @@ koi8_is_code_ctype(OnigCodePoint code, unsigned int ctype)
 
 static int
 koi8_get_all_pair_ambig_codes(OnigAmbigType flag,
-                              OnigPairAmbigCodes** ccs)
+                              const OnigPairAmbigCodes** ccs)
 {
-  static OnigPairAmbigCodes cc[] = {
+  static const OnigPairAmbigCodes cc[] = {
     { 0xc0, 0xe0 },
     { 0xc1, 0xe1 },
     { 0xc2, 0xe2 },
index 7c626df61615c2855cc73a221206c69ae8d51a56..1010f5ff938e69430c8ce0350e0528ffaa1885ec 100644 (file)
@@ -2,7 +2,7 @@
   koi8_r.c -  Oniguruma (regular expression library)
 **********************************************************************/
 /*-
- * Copyright (c) 2002-2005  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp>
+ * Copyright (c) 2002-2006  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -33,7 +33,7 @@
 #define ENC_IS_KOI8_R_CTYPE(code,ctype) \
   ((EncKOI8_R_CtypeTable[code] & ctype) != 0)
 
-static UChar EncKOI8_R_ToLowerCaseTable[256] = {
+static const UChar EncKOI8_R_ToLowerCaseTable[256] = {
   '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
   '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017',
   '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027',
@@ -68,23 +68,23 @@ static UChar EncKOI8_R_ToLowerCaseTable[256] = {
   '\330', '\331', '\332', '\333', '\334', '\335', '\336', '\337'
 };
 
-static unsigned short EncKOI8_R_CtypeTable[256] = {
+static const unsigned short EncKOI8_R_CtypeTable[256] = {
   0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
   0x2008, 0x220c, 0x2209, 0x2208, 0x2208, 0x2208, 0x2008, 0x2008,
   0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
   0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
-  0x2284, 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x21a0, 0x21a0, 0x21a0,
-  0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
+  0x2284, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
+  0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
   0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0,
-  0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x20a0, 0x20a0, 0x20a0, 0x21a0,
+  0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
   0x21a0, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x34a2,
   0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
   0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
-  0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x31a0,
-  0x20a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2,
+  0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x31a0,
+  0x21a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2,
   0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
   0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
-  0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x20a0, 0x21a0, 0x20a0, 0x2008,
+  0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x2008,
   0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0,
   0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0,
   0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0,
@@ -150,9 +150,12 @@ koi8_r_is_code_ctype(OnigCodePoint code, unsigned int ctype)
 
 static int
 koi8_r_get_all_pair_ambig_codes(OnigAmbigType flag,
-                              OnigPairAmbigCodes** ccs)
+                                const OnigPairAmbigCodes** ccs)
 {
-  static OnigPairAmbigCodes cc[] = {
+  static const OnigPairAmbigCodes cc[] = {
+    { 0xa3, 0xb3 },
+    { 0xb3, 0xa3 },
+
     { 0xc0, 0xe0 },
     { 0xc1, 0xe1 },
     { 0xc2, 0xe2 },
index 6b9ef4c5b5b1de86434e5ef2e71695088ed67c40..fcf057423cfc510ddbfa603d4c3b733ea0b00e49 100644 (file)
@@ -2,7 +2,7 @@
   mktable.c
 **********************************************************************/
 /*-
- * Copyright (c) 2002-2004  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp>
+ * Copyright (c) 2002-2006  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -27,6 +27,7 @@
  * SUCH DAMAGE.
  */
 
+#include <stdlib.h>
 #include <stdio.h>
 
 #define NOT_RUBY
@@ -614,15 +615,10 @@ static int IsPunct(int enc, int c)
     if (c >= 0x3c && c <= 0x3e) return 1;
   }
 
-  if (c >= 0x21 && c <= 0x23) return 1;
-  if (c >= 0x25 && c <= 0x2a) return 1;
-  if (c >= 0x2c && c <= 0x2f) return 1;
-  if (c >= 0x3a && c <= 0x3b) return 1;
-  if (c >= 0x3f && c <= 0x40) return 1;
-  if (c >= 0x5b && c <= 0x5d) return 1;
-  if (c == 0x5f) return 1;
-  if (c == 0x7b) return 1;
-  if (c == 0x7d) return 1;
+  if (c >= 0x21 && c <= 0x2f) return 1;
+  if (c >= 0x3a && c <= 0x40) return 1;
+  if (c >= 0x5b && c <= 0x60) return 1;
+  if (c >= 0x7b && c <= 0x7e) return 1;
 
   switch (enc) {
   case ISO_8859_1:
index e13407bccfd9102edca5add3d38a06ef76364f6d..f7d7d5226554015d82ba077cf3f1d9a3f6c3a40b 100644 (file)
@@ -29,7 +29,7 @@
 
 #include "regenc.h"
 
-static int EncLen_SJIS[] = {
+static const int EncLen_SJIS[] = {
   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
@@ -76,7 +76,7 @@ sjis_mbc_enc_len(const UChar* p)
   return EncLen_SJIS[*p];
 }
 
-extern int
+static int
 sjis_code_to_mbclen(OnigCodePoint code)
 {
   if (code < 256) {
@@ -167,21 +167,16 @@ sjis_is_mbc_ambiguous(OnigAmbigType flag, const UChar** pp, const UChar* end)
 static int
 sjis_is_code_ctype(OnigCodePoint code, unsigned int ctype)
 {
-  if ((ctype & ONIGENC_CTYPE_WORD) != 0) {
-    if (code < 128)
-      return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype);
-    else {
+  if (code < 128)
+    return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype);
+  else {
+    if ((ctype & (ONIGENC_CTYPE_WORD |
+                  ONIGENC_CTYPE_GRAPH | ONIGENC_CTYPE_PRINT)) != 0) {
       return (sjis_code_to_mbclen(code) > 1 ? TRUE : FALSE);
     }
-
-    ctype &= ~ONIGENC_CTYPE_WORD;
-    if (ctype == 0) return FALSE;
   }
 
-  if (code < 128)
-    return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype);
-  else
-    return FALSE;
+  return FALSE;
 }
 
 static UChar*
index e3be9450a5118f1aff976ce90129180fd8c56c1b..a8cf539014ca0caf5f2d2b2a16a7db8373280420 100644 (file)
@@ -30,7 +30,7 @@
 #include "regenc.h"
 
 
-unsigned short OnigEnc_Unicode_ISO_8859_1_CtypeTable[256] = {
+const unsigned short OnigEnc_Unicode_ISO_8859_1_CtypeTable[256] = {
   0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
   0x2008, 0x228c, 0x2289, 0x2288, 0x2288, 0x2288, 0x2008, 0x2008,
   0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
@@ -65,7 +65,7 @@ unsigned short OnigEnc_Unicode_ISO_8859_1_CtypeTable[256] = {
   0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2
 };
 
-static OnigCodePoint CRAlnum[] = {
+static const OnigCodePoint CRAlnum[] = {
 #ifdef USE_UNICODE_FULL_RANGE_CTYPE
   414,
 #else
@@ -490,7 +490,7 @@ static OnigCodePoint CRAlnum[] = {
 #endif /* USE_UNICODE_FULL_RANGE_CTYPE */
 }; /* end of CRAlnum */
 
-static OnigCodePoint CRAlpha[] = {
+static const OnigCodePoint CRAlpha[] = {
 #ifdef USE_UNICODE_FULL_RANGE_CTYPE
   396,
 #else
@@ -897,7 +897,7 @@ static OnigCodePoint CRAlpha[] = {
 #endif /* USE_UNICODE_FULL_RANGE_CTYPE */
 }; /* end of CRAlpha */
 
-static OnigCodePoint CRBlank[] = {
+static const OnigCodePoint CRBlank[] = {
 #ifdef USE_UNICODE_FULL_RANGE_CTYPE
   9,
 #else
@@ -917,7 +917,7 @@ static OnigCodePoint CRBlank[] = {
 #endif /* USE_UNICODE_FULL_RANGE_CTYPE */
 }; /* end of CRBlank */
 
-static OnigCodePoint CRCntrl[] = {
+static const OnigCodePoint CRCntrl[] = {
 #ifdef USE_UNICODE_FULL_RANGE_CTYPE
   19,
 #else
@@ -947,7 +947,7 @@ static OnigCodePoint CRCntrl[] = {
 #endif /* USE_UNICODE_FULL_RANGE_CTYPE */
 }; /* end of CRCntrl */
 
-static OnigCodePoint CRDigit[] = {
+static const OnigCodePoint CRDigit[] = {
 #ifdef USE_UNICODE_FULL_RANGE_CTYPE
   23,
 #else
@@ -981,7 +981,7 @@ static OnigCodePoint CRDigit[] = {
 #endif /* USE_UNICODE_FULL_RANGE_CTYPE */
 }; /* end of CRDigit */
 
-static OnigCodePoint CRGraph[] = {
+static const OnigCodePoint CRGraph[] = {
 #ifdef USE_UNICODE_FULL_RANGE_CTYPE
   405,
 #else
@@ -1397,7 +1397,7 @@ static OnigCodePoint CRGraph[] = {
 #endif /* USE_UNICODE_FULL_RANGE_CTYPE */
 }; /* end of CRGraph */
 
-static OnigCodePoint CRLower[] = {
+static const OnigCodePoint CRLower[] = {
 #ifdef USE_UNICODE_FULL_RANGE_CTYPE
   424,
 #else
@@ -1832,7 +1832,7 @@ static OnigCodePoint CRLower[] = {
 #endif /* USE_UNICODE_FULL_RANGE_CTYPE */
 }; /* end of CRLower */
 
-static OnigCodePoint CRPrint[] = {
+static const OnigCodePoint CRPrint[] = {
 #ifdef USE_UNICODE_FULL_RANGE_CTYPE
   405,
 #else
@@ -2248,7 +2248,7 @@ static OnigCodePoint CRPrint[] = {
 #endif /* USE_UNICODE_FULL_RANGE_CTYPE */
 }; /* end of CRPrint */
 
-static OnigCodePoint CRPunct[] = {
+static const OnigCodePoint CRPunct[] = {
 #ifdef USE_UNICODE_FULL_RANGE_CTYPE
   86,
 #else
@@ -2345,7 +2345,7 @@ static OnigCodePoint CRPunct[] = {
 #endif /* USE_UNICODE_FULL_RANGE_CTYPE */
 }; /* end of CRPunct */
 
-static OnigCodePoint CRSpace[] = {
+static const OnigCodePoint CRSpace[] = {
 #ifdef USE_UNICODE_FULL_RANGE_CTYPE
   11,
 #else
@@ -2367,7 +2367,7 @@ static OnigCodePoint CRSpace[] = {
 #endif /* USE_UNICODE_FULL_RANGE_CTYPE */
 }; /* end of CRSpace */
 
-static OnigCodePoint CRUpper[] = {
+static const OnigCodePoint CRUpper[] = {
 #ifdef USE_UNICODE_FULL_RANGE_CTYPE
   421,
 #else
@@ -2799,7 +2799,7 @@ static OnigCodePoint CRUpper[] = {
 #endif /* USE_UNICODE_FULL_RANGE_CTYPE */
 }; /* end of CRUpper */
 
-static OnigCodePoint CRXDigit[] = {
+static const OnigCodePoint CRXDigit[] = {
 #ifdef USE_UNICODE_FULL_RANGE_CTYPE
   3,
 #else
@@ -2810,7 +2810,7 @@ static OnigCodePoint CRXDigit[] = {
   0x0061, 0x0066
 };
 
-static OnigCodePoint CRASCII[] = {
+static const OnigCodePoint CRASCII[] = {
 #ifdef USE_UNICODE_FULL_RANGE_CTYPE
   1,
 #else
@@ -2819,7 +2819,7 @@ static OnigCodePoint CRASCII[] = {
   0x0000, 0x007f
 };
 
-static OnigCodePoint CRWord[] = {
+static const OnigCodePoint CRWord[] = {
 #ifdef USE_UNICODE_FULL_RANGE_CTYPE
   436,
 #else
@@ -3320,6 +3320,9 @@ onigenc_unicode_is_code_ctype(OnigCodePoint code, unsigned int ctype)
   case ONIGENC_CTYPE_ALNUM:
     return onig_is_in_code_range((UChar* )CRAlnum, code);
     break;
+  case ONIGENC_CTYPE_NEWLINE:
+    return FALSE;
+    break;
 
   default:
     return ONIGENCERR_TYPE_BUG;
@@ -3337,9 +3340,9 @@ onigenc_unicode_is_code_ctype(OnigCodePoint code, unsigned int ctype)
 
 extern int
 onigenc_unicode_get_ctype_code_range(int ctype,
-                 OnigCodePoint* sbr[], OnigCodePoint* mbr[])
+                 const OnigCodePoint* sbr[], const OnigCodePoint* mbr[])
 {
-  static OnigCodePoint EmptyRange[] = { 0 };
+  static const OnigCodePoint EmptyRange[] = { 0 };
 
 #define CR_SET(list) do { \
   *mbr = list; \
index ad33ddbeeb3b30c9bfc0fd1b34a7fcea7583f07a..6ab80a6c1cea82ea5ed2bcc1135c45c5b4e9310e 100755 (executable)
@@ -2,7 +2,7 @@
   utf16_be.c -  Oniguruma (regular expression library)
 **********************************************************************/
 /*-
- * Copyright (c) 2002-2005  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp>
+ * Copyright (c) 2002-2006  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -32,7 +32,7 @@
 #define UTF16_IS_SURROGATE_FIRST(c)    (c >= 0xd8 && c <= 0xdb)
 #define UTF16_IS_SURROGATE_SECOND(c)   (c >= 0xdc && c <= 0xdf)
 
-static int EncLen_UTF16[] = {
+static const int EncLen_UTF16[] = {
   2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
   2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
   2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
@@ -63,6 +63,12 @@ utf16be_is_mbc_newline(const UChar* p, const UChar* end)
   if (p + 1 < end) {
     if (*(p+1) == 0x0a && *p == 0x00)
       return 1;
+#ifdef USE_UNICODE_ALL_LINE_TERMINATORS
+    if ((*(p+1) == 0x0d || *(p+1) == 0x85) && *p == 0x00)
+      return 1;
+    if (*p == 0x20 && (*(p+1) == 0x29 || *(p+1) == 0x28))
+      return 1;
+#endif
   }
   return 0;
 }
@@ -120,18 +126,6 @@ utf16be_mbc_to_normalize(OnigAmbigType flag, const UChar** pp, const UChar* end,
 
   if (*p == 0) {
     p++;
-    if (end > p + 2 &&
-       (flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0 &&
-       ((*p == 's' && *(p+2) == 's') ||
-       ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
-        (*p == 'S' && *(p+2) == 'S'))) &&
-        *(p+1) == 0) {
-      *lower++ = '\0';
-      *lower   = 0xdf;
-      (*pp) += 4;
-      return 2;
-    }
-
     *lower++ = '\0';
     if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
         ONIGENC_IS_MBC_ASCII(p)) ||
@@ -171,20 +165,6 @@ utf16be_is_mbc_ambiguous(OnigAmbigType flag, const UChar** pp, const UChar* end)
     int c, v;
 
     p++;
-    if ((flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0) {
-      if (end > p + 2 &&
-         ((*p == 's' && *(p+2) == 's') ||
-          ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
-           (*p == 'S' && *(p+2) == 'S'))) &&
-          *(p+1) == 0) {
-        (*pp) += 2;
-        return TRUE;
-      }
-      else if (*p == 0xdf) {
-        return TRUE;
-      }
-    }
-
     if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
         ONIGENC_IS_MBC_ASCII(p)) ||
        ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 &&
@@ -228,8 +208,7 @@ OnigEncodingType OnigEncodingUTF16_BE = {
   4,            /* max byte length */
   2,            /* min byte length */
   (ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE |
-   ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE |
-   ONIGENC_AMBIGUOUS_MATCH_COMPOUND),
+   ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE ),
   {
       (OnigCodePoint )'\\'                       /* esc */
     , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.'  */
index db892dcd14d92682f27cb00ea7d7fea99b61bd83..2248e4910fd9ef3c3824fb3223c0e5de61402d6f 100755 (executable)
@@ -2,7 +2,7 @@
   utf16_le.c -  Oniguruma (regular expression library)
 **********************************************************************/
 /*-
- * Copyright (c) 2002-2005  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp>
+ * Copyright (c) 2002-2006  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -32,7 +32,7 @@
 #define UTF16_IS_SURROGATE_FIRST(c)    (c >= 0xd8 && c <= 0xdb)
 #define UTF16_IS_SURROGATE_SECOND(c)   (c >= 0xdc && c <= 0xdf)
 
-static int EncLen_UTF16[] = {
+static const int EncLen_UTF16[] = {
   2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
   2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
   2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
@@ -69,6 +69,12 @@ utf16le_is_mbc_newline(const UChar* p, const UChar* end)
   if (p + 1 < end) {
     if (*p == 0x0a && *(p+1) == 0x00)
       return 1;
+#ifdef USE_UNICODE_ALL_LINE_TERMINATORS
+    if ((*p == 0x0d || *p == 0x85) && *(p+1) == 0x00)
+      return 1;
+    if (*(p+1) == 0x20 && (*p == 0x29 || *p == 0x28))
+      return 1;
+#endif
   }
   return 0;
 }
@@ -122,18 +128,6 @@ utf16le_mbc_to_normalize(OnigAmbigType flag, const UChar** pp, const UChar* end,
   const UChar* p = *pp;
 
   if (*(p+1) == 0) {
-    if (end > p + 3 &&
-       (flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0 &&
-       ((*p == 's' && *(p+2) == 's') ||
-        ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
-         (*p == 'S' && *(p+2) == 'S'))) &&
-        *(p+3) == 0) {
-      *lower++ = 0xdf;
-      *lower   = '\0';
-      (*pp) += 4;
-      return 2;
-    }
-
     *(lower+1) = '\0';
     if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
         ONIGENC_IS_MBC_ASCII(p)) ||
@@ -170,17 +164,6 @@ utf16le_is_mbc_ambiguous(OnigAmbigType flag, const UChar** pp, const UChar* end)
   if (*(p+1) == 0) {
     int c, v;
 
-    if ((flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0) {
-      if (end > p + 3 &&
-         ((*p == 's' && *(p+2) == 's') ||
-          ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
-           (*p == 'S' && *(p+2) == 'S'))) &&
-          *(p+3) == 0) {
-        (*pp) += 2;
-        return TRUE;
-      }
-    }
-
     if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
         ONIGENC_IS_MBC_ASCII(p)) ||
        ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 &&
@@ -223,8 +206,7 @@ OnigEncodingType OnigEncodingUTF16_LE = {
   4,            /* max byte length */
   2,            /* min byte length */
   (ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE |
-   ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE |
-   ONIGENC_AMBIGUOUS_MATCH_COMPOUND),
+   ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE ),
   {
       (OnigCodePoint )'\\'                       /* esc */
     , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.'  */
index 60feb040b821dbfe669e16fc185bdf1c7aa65fcd..75133ca2626e1f518b0d59103d04ab071a52834d 100755 (executable)
@@ -2,7 +2,7 @@
   utf32_be.c -  Oniguruma (regular expression library)
 **********************************************************************/
 /*-
- * Copyright (c) 2002-2005  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp>
+ * Copyright (c) 2002-2006  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -41,6 +41,14 @@ utf32be_is_mbc_newline(const UChar* p, const UChar* end)
   if (p + 3 < end) {
     if (*(p+3) == 0x0a && *(p+2) == 0 && *(p+1) == 0 && *p == 0)
       return 1;
+#ifdef USE_UNICODE_ALL_LINE_TERMINATORS
+    if ((*(p+3) == 0x0d || *(p+3) == 0x85)
+       && *(p+2) == 0 && *(p+1) == 0 && *p == 0x00)
+      return 1;
+    if (*(p+2) == 0x20 && (*(p+3) == 0x29 || *(p+3) == 0x28)
+       && *(p+1) == 0 && *p == 0)
+      return 1;
+#endif
   }
   return 0;
 }
@@ -77,20 +85,6 @@ utf32be_mbc_to_normalize(OnigAmbigType flag, const UChar** pp, const UChar* end,
 
   if (*(p+2) == 0 && *(p+1) == 0 && *p == 0) {
     p += 3;
-    if (end > p + 4 &&
-        (flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0 &&
-       ((*p == 's' && *(p+4) == 's') ||
-       ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
-        (*p == 'S' && *(p+4) == 'S'))) &&
-        *(p+3) == 0 && *(p+2) == 0 && *(p+1) == 0) {
-      *lower++ = '\0';
-      *lower++ = '\0';
-      *lower++ = '\0';
-      *lower   = 0xdf;
-      (*pp) += 8;
-      return 4;
-    }
-
     *lower++ = '\0';
     *lower++ = '\0';
     *lower++ = '\0';
@@ -131,20 +125,6 @@ utf32be_is_mbc_ambiguous(OnigAmbigType flag, const UChar** pp, const UChar* end)
     int c, v;
 
     p += 3;
-    if ((flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0) {
-      if (end > p + 4 &&
-         ((*p == 's' && *(p+4) == 's') ||
-          ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
-           (*p == 'S' && *(p+4) == 'S'))) &&
-          *(p+3) == 0 && *(p+2) == 0 && *(p+1) == 0) {
-        (*pp) += 4;
-        return TRUE;
-      }
-      else if (*p == 0xdf) {
-        return TRUE;
-      }
-    }
-
     if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
         ONIGENC_IS_MBC_ASCII(p)) ||
        ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 &&
@@ -183,8 +163,7 @@ OnigEncodingType OnigEncodingUTF32_BE = {
   4,            /* max byte length */
   4,            /* min byte length */
   (ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE |
-   ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE |
-   ONIGENC_AMBIGUOUS_MATCH_COMPOUND),
+   ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE ),
   {
       (OnigCodePoint )'\\'                       /* esc */
     , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.'  */
index bba9689f7617445342f7b94ef9034f5b32bf4c35..21dca10c115d21a43e7cbf02771bc7342ee2b681 100755 (executable)
@@ -2,7 +2,7 @@
   utf32_le.c -  Oniguruma (regular expression library)
 **********************************************************************/
 /*-
- * Copyright (c) 2002-2005  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp>
+ * Copyright (c) 2002-2006  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -41,6 +41,14 @@ utf32le_is_mbc_newline(const UChar* p, const UChar* end)
   if (p + 3 < end) {
     if (*p == 0x0a && *(p+1) == 0 && *(p+2) == 0 && *(p+3) == 0)
       return 1;
+#ifdef USE_UNICODE_ALL_LINE_TERMINATORS
+    if ((*p == 0x0d || *p == 0x85) && *(p+1) == 0x00
+       && (p+2) == 0x00 && *(p+3) == 0x00)
+      return 1;
+    if (*(p+1) == 0x20 && (*p == 0x29 || *p == 0x28)
+       && *(p+2) == 0x00 && *(p+3) == 0x00)
+      return 1;
+#endif
   }
   return 0;
 }
@@ -76,20 +84,6 @@ utf32le_mbc_to_normalize(OnigAmbigType flag, const UChar** pp, const UChar* end,
   const UChar* p = *pp;
 
   if (*(p+1) == 0 && *(p+2) == 0 && *(p+3) == 0) {
-    if (end > p + 7 &&
-       (flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0 &&
-       ((*p == 's' && *(p+4) == 's') ||
-        ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
-         (*p == 'S' && *(p+4) == 'S'))) &&
-        *(p+5) == 0 && *(p+6) == 0 && *(p+7) == 0) {
-      *lower++ = 0xdf;
-      *lower++ = '\0';
-      *lower++ = '\0';
-      *lower   = '\0';
-      (*pp) += 8;
-      return 4;
-    }
-
     if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
         ONIGENC_IS_MBC_ASCII(p)) ||
        ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 &&
@@ -129,20 +123,6 @@ utf32le_is_mbc_ambiguous(OnigAmbigType flag, const UChar** pp, const UChar* end)
   if (*(p+1) == 0 && *(p+2) == 0 && *(p+3) == 0) {
     int c, v;
 
-    if ((flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0) {
-      if (end > p + 7 &&
-         ((*p == 's' && *(p+4) == 's') ||
-          ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
-           (*p == 'S' && *(p+4) == 'S'))) &&
-          *(p+5) == 0 && *(p+6) == 0 && *(p+7) == 0) {
-        (*pp) += 4;
-        return TRUE;
-      }
-      else if (*p == 0xdf) {
-        return TRUE;
-      }
-    }
-
     if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
         ONIGENC_IS_MBC_ASCII(p)) ||
        ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 &&
@@ -181,8 +161,7 @@ OnigEncodingType OnigEncodingUTF32_LE = {
   4,            /* max byte length */
   4,            /* min byte length */
   (ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE |
-   ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE |
-   ONIGENC_AMBIGUOUS_MATCH_COMPOUND),
+   ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE ),
   {
       (OnigCodePoint )'\\'                       /* esc */
     , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.'  */
index 592bebfe8f26c5eacc76d5df65aca06278be5dfc..c7481d7050ae9d659bfa8be3331ccffa63bdae60 100644 (file)
@@ -2,7 +2,7 @@
   utf8.c -  Oniguruma (regular expression library)
 **********************************************************************/
 /*-
- * Copyright (c) 2002-2005  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp>
+ * Copyright (c) 2002-2006  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -40,7 +40,7 @@
 
 #define utf8_islead(c)     ((UChar )((c) & 0xc0) != 0x80)
 
-static int EncLen_UTF8[] = {
+static const int EncLen_UTF8[] = {
   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
@@ -65,6 +65,29 @@ utf8_mbc_enc_len(const UChar* p)
   return EncLen_UTF8[*p];
 }
 
+static int
+utf8_is_mbc_newline(const UChar* p, const UChar* end)
+{
+  if (p < end) {
+    if (*p == 0x0a) return 1;
+
+#ifdef USE_UNICODE_ALL_LINE_TERMINATORS
+    if (*p == 0x0d) return 1;
+    if (p + 1 < end) {
+      if (*(p+1) == 0x85 && *p == 0xc2) /* U+0085 */
+       return 1;
+      if (p + 2 < end) {
+       if ((*(p+2) == 0xa8 || *(p+2) == 0xa9)
+           && *(p+1) == 0x80 && *p == 0xe2)  /* U+2028, U+2029 */
+         return 1;
+      }
+    }
+#endif
+  }
+
+  return 0;
+}
+
 static OnigCodePoint
 utf8_mbc_to_code(const UChar* p, const UChar* end)
 {
@@ -200,17 +223,6 @@ utf8_mbc_to_normalize(OnigAmbigType flag, const UChar** pp, const UChar* end, UC
   const UChar* p = *pp;
 
   if (ONIGENC_IS_MBC_ASCII(p)) {
-    if (end > p + 1 &&
-        (flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0 &&
-       ((*p == 's' && *(p+1) == 's') ||
-        ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
-         (*p == 'S' && *(p+1) == 'S')))) {
-      *lower++ = '\303';
-      *lower   = '\237';
-      (*pp) += 2;
-      return 2;
-    }
-
     if ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0) {
       *lower = ONIGENC_ASCII_CODE_TO_LOWER_CASE(*p);
     }
@@ -235,15 +247,6 @@ utf8_mbc_to_normalize(OnigAmbigType flag, const UChar** pp, const UChar* end, UC
             return 2;
           }
         }
-#if 0
-        else if (c == (UChar )'\237' &&
-                 (flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0) {
-          *lower++ = '\303';
-          *lower   = '\237';
-          (*pp) += 2;
-          return 2;
-        }
-#endif
       }
     }
 
@@ -265,15 +268,6 @@ utf8_is_mbc_ambiguous(OnigAmbigType flag, const UChar** pp, const UChar* end)
   const UChar* p = *pp;
 
   if (ONIGENC_IS_MBC_ASCII(p)) {
-    if (end > p + 1 &&
-        (flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0 &&
-       ((*p == 's' && *(p+1) == 's') ||
-        ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
-         (*p == 'S' && *(p+1) == 'S')))) {
-      (*pp) += 2;
-      return TRUE;
-    }
-
     (*pp)++;
     if ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0) {
       return ONIGENC_IS_ASCII_CODE_CASE_AMBIG(*p);
@@ -295,10 +289,6 @@ utf8_is_mbc_ambiguous(OnigAmbigType flag, const UChar** pp, const UChar* end)
             return TRUE;
           }
         }
-        else if (c == (UChar )'\237' &&
-                 (flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0) {
-         return TRUE;
-        }
       }
     }
   }
@@ -307,16 +297,16 @@ utf8_is_mbc_ambiguous(OnigAmbigType flag, const UChar** pp, const UChar* end)
 }
 
 
-static OnigCodePoint EmptyRange[] = { 0 };
+static const OnigCodePoint EmptyRange[] = { 0 };
 
-static OnigCodePoint SBAlnum[] = {
+static const OnigCodePoint SBAlnum[] = {
   3,
   0x0030, 0x0039,
   0x0041, 0x005a,
   0x0061, 0x007a
 };
 
-static OnigCodePoint MBAlnum[] = {
+static const OnigCodePoint MBAlnum[] = {
 #ifdef USE_UNICODE_FULL_RANGE_CTYPE
   411,
 #else
@@ -738,13 +728,13 @@ static OnigCodePoint MBAlnum[] = {
 #endif /* USE_UNICODE_FULL_RANGE_CTYPE */
 }; /* end of MBAlnum */
 
-static OnigCodePoint SBAlpha[] = {
+static const OnigCodePoint SBAlpha[] = {
   2,
   0x0041, 0x005a,
   0x0061, 0x007a
 };
 
-static OnigCodePoint MBAlpha[] = {
+static const OnigCodePoint MBAlpha[] = {
 #ifdef USE_UNICODE_FULL_RANGE_CTYPE
   394,
 #else
@@ -1149,13 +1139,13 @@ static OnigCodePoint MBAlpha[] = {
 #endif /* USE_UNICODE_FULL_RANGE_CTYPE */
 }; /* end of MBAlpha */
 
-static OnigCodePoint SBBlank[] = {
+static const OnigCodePoint SBBlank[] = {
   2,
   0x0009, 0x0009,
   0x0020, 0x0020
 };
 
-static OnigCodePoint MBBlank[] = {
+static const OnigCodePoint MBBlank[] = {
 #ifdef USE_UNICODE_FULL_RANGE_CTYPE
   7,
 #else
@@ -1173,13 +1163,13 @@ static OnigCodePoint MBBlank[] = {
 #endif /* USE_UNICODE_FULL_RANGE_CTYPE */
 }; /* end of MBBlank */
 
-static OnigCodePoint SBCntrl[] = {
+static const OnigCodePoint SBCntrl[] = {
   2,
   0x0000, 0x001f,
   0x007f, 0x007f
 };
 
-static OnigCodePoint MBCntrl[] = {
+static const OnigCodePoint MBCntrl[] = {
 #ifdef USE_UNICODE_FULL_RANGE_CTYPE
   18,
 #else
@@ -1208,12 +1198,12 @@ static OnigCodePoint MBCntrl[] = {
 #endif /* USE_UNICODE_FULL_RANGE_CTYPE */
 }; /* end of MBCntrl */
 
-static OnigCodePoint SBDigit[] = {
+static const OnigCodePoint SBDigit[] = {
   1,
   0x0030, 0x0039
 };
 
-static OnigCodePoint MBDigit[] = {
+static const OnigCodePoint MBDigit[] = {
 #ifdef USE_UNICODE_FULL_RANGE_CTYPE
   22,
 #else
@@ -1245,12 +1235,12 @@ static OnigCodePoint MBDigit[] = {
 #endif /* USE_UNICODE_FULL_RANGE_CTYPE */
 }; /* end of MBDigit */
 
-static OnigCodePoint SBGraph[] = {
+static const OnigCodePoint SBGraph[] = {
   1,
   0x0021, 0x007e
 };
 
-static OnigCodePoint MBGraph[] = {
+static const OnigCodePoint MBGraph[] = {
 #ifdef USE_UNICODE_FULL_RANGE_CTYPE
   404,
 #else
@@ -1665,12 +1655,12 @@ static OnigCodePoint MBGraph[] = {
 #endif /* USE_UNICODE_FULL_RANGE_CTYPE */
 }; /* end of MBGraph */
 
-static OnigCodePoint SBLower[] = {
+static const OnigCodePoint SBLower[] = {
   1,
   0x0061, 0x007a
 };
 
-static OnigCodePoint MBLower[] = {
+static const OnigCodePoint MBLower[] = {
 #ifdef USE_UNICODE_FULL_RANGE_CTYPE
   423,
 #else
@@ -2104,13 +2094,13 @@ static OnigCodePoint MBLower[] = {
 #endif /* USE_UNICODE_FULL_RANGE_CTYPE */
 }; /* end of MBLower */
 
-static OnigCodePoint SBPrint[] = {
+static const OnigCodePoint SBPrint[] = {
   2,
   0x0009, 0x000d,
   0x0020, 0x007e
 };
 
-static OnigCodePoint MBPrint[] = {
+static const OnigCodePoint MBPrint[] = {
 #ifdef USE_UNICODE_FULL_RANGE_CTYPE
   403,
 #else
@@ -2524,7 +2514,7 @@ static OnigCodePoint MBPrint[] = {
 #endif /* USE_UNICODE_FULL_RANGE_CTYPE */
 }; /* end of MBPrint */
 
-static OnigCodePoint SBPunct[] = {
+static const OnigCodePoint SBPunct[] = {
   9,
   0x0021, 0x0023,
   0x0025, 0x002a,
@@ -2537,7 +2527,7 @@ static OnigCodePoint SBPunct[] = {
   0x007d, 0x007d
 }; /* end of SBPunct */
 
-static OnigCodePoint MBPunct[] = {
+static const OnigCodePoint MBPunct[] = {
 #ifdef USE_UNICODE_FULL_RANGE_CTYPE
   77,
 #else
@@ -2625,13 +2615,13 @@ static OnigCodePoint MBPunct[] = {
 #endif /* USE_UNICODE_FULL_RANGE_CTYPE */
 }; /* end of MBPunct */
 
-static OnigCodePoint SBSpace[] = {
+static const OnigCodePoint SBSpace[] = {
   2,
   0x0009, 0x000d,
   0x0020, 0x0020
 };
 
-static OnigCodePoint MBSpace[] = {
+static const OnigCodePoint MBSpace[] = {
 #ifdef USE_UNICODE_FULL_RANGE_CTYPE
   9,
 #else
@@ -2651,12 +2641,12 @@ static OnigCodePoint MBSpace[] = {
 #endif /* USE_UNICODE_FULL_RANGE_CTYPE */
 }; /* end of MBSpace */
 
-static OnigCodePoint SBUpper[] = {
+static const OnigCodePoint SBUpper[] = {
   1,
   0x0041, 0x005a
 };
 
-static OnigCodePoint MBUpper[] = {
+static const OnigCodePoint MBUpper[] = {
 #ifdef USE_UNICODE_FULL_RANGE_CTYPE
   420,
 #else
@@ -3087,19 +3077,19 @@ static OnigCodePoint MBUpper[] = {
 #endif /* USE_UNICODE_FULL_RANGE_CTYPE */
 }; /* end of MBUpper */
 
-static OnigCodePoint SBXDigit[] = {
+static const OnigCodePoint SBXDigit[] = {
   3,
   0x0030, 0x0039,
   0x0041, 0x0046,
   0x0061, 0x0066
 };
 
-static OnigCodePoint SBASCII[] = {
+static const OnigCodePoint SBASCII[] = {
   1,
   0x0000, 0x007f
 };
 
-static OnigCodePoint SBWord[] = {
+static const OnigCodePoint SBWord[] = {
   4,
   0x0030, 0x0039,
   0x0041, 0x005a,
@@ -3107,7 +3097,7 @@ static OnigCodePoint SBWord[] = {
   0x0061, 0x007a
 };
 
-static OnigCodePoint MBWord[] = {
+static const OnigCodePoint MBWord[] = {
 #ifdef USE_UNICODE_FULL_RANGE_CTYPE
   432,
 #else
@@ -3554,7 +3544,7 @@ static OnigCodePoint MBWord[] = {
 
 static int
 utf8_get_ctype_code_range(int ctype,
-                          OnigCodePoint* sbr[], OnigCodePoint* mbr[])
+                          const OnigCodePoint* sbr[], const OnigCodePoint* mbr[])
 {
 #define CR_SET(sbl,mbl) do { \
   *sbr = sbl; \
@@ -3622,7 +3612,7 @@ static int
 utf8_is_code_ctype(OnigCodePoint code, unsigned int ctype)
 {
 #ifdef USE_UNICODE_FULL_RANGE_CTYPE
-  OnigCodePoint *range;
+  const OnigCodePoint *range;
 #endif
 
   if (code < 256) {
@@ -3674,6 +3664,9 @@ utf8_is_code_ctype(OnigCodePoint code, unsigned int ctype)
   case ONIGENC_CTYPE_ALNUM:
     range = MBAlnum;
     break;
+  case ONIGENC_CTYPE_NEWLINE:
+    return FALSE;
+    break;
 
   default:
     return ONIGENCERR_TYPE_BUG;
@@ -3713,8 +3706,7 @@ OnigEncodingType OnigEncodingUTF8 = {
   6,           /* max byte length */
   1,           /* min byte length */
   (ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE | 
-   ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE | 
-   ONIGENC_AMBIGUOUS_MATCH_COMPOUND),
+   ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE ),
   {
       (OnigCodePoint )'\\'                       /* esc */
     , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.'  */
@@ -3723,7 +3715,7 @@ OnigEncodingType OnigEncodingUTF8 = {
     , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
     , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
   },
-  onigenc_is_mbc_newline_0x0a,
+  utf8_is_mbc_newline,
   utf8_mbc_to_code,
   utf8_code_to_mbclen,
   utf8_code_to_mbc,
index 02e844c363d4955ed9f6932bc09a20de9c2cd637..d55f1cc94f7242a50f04cd95bcf69a80e4008a3d 100755 (executable)
@@ -5,58 +5,50 @@
 </head>
 <body BGCOLOR="#ffffff" VLINK="#808040" TEXT="#696969">
 
-<!--
-<a href="http://miuras.net/matsushita.html">
-<img src="anti_matsushita.PNG" height="46" width="266">
-</a>
--->
-<a href="http://miuras.net/matsushita.html">M</a>
-<a href="http://www.micropac.co.jp/nec/">N</a>
-
-<h1>Oniguruma</h1>
+<h1>Oniguruma</h1> (<a href="index_ja.html">Japanese</a>)
 
 <p>
-2005/03/07 (C) K.Kosako
+(c) K.Kosako, updated at: 2007/08/16
 </p>
 
+<dl>
+<font color="orange">
+<dt><b>What's new</b>
+</font>
+<ul>
+<li>2007/08/16: Version 4.7.1 released.</li>
+<li>2007/07/14: Version 5.9.0 released.</li>
+<li>2007/06/20: Version 2.5.9 released.</li>
+<li>2007/06/20: Maintainer of 2.x was changed.</li>
+</ul>
+</dl>
+<hr>
+
 <p>
 Oniguruma is a regular expressions library.<br>
 The characteristics of this library is that different character encoding
 <br>for every regular expression object can be specified.
+<br>(supported APIs: GNU regex, POSIX and Oniguruma native)
 </p>
 
 <dl>
 <dt><b>Supported character encodings:</b><br>
 ASCII, UTF-8, UTF-16BE, UTF-16LE, UTF-32BE, UTF-32LE,<br>
 EUC-JP, EUC-TW, EUC-KR, EUC-CN,<br>
-Shift_JIS, Big5, KOI8-R, KOI8,<br>
+Shift_JIS, Big5, GB18030, KOI8-R, CP1251,<br>
 ISO-8859-1, ISO-8859-2, ISO-8859-3, ISO-8859-4, ISO-8859-5,<br>
 ISO-8859-6, ISO-8859-7, ISO-8859-8, ISO-8859-9, ISO-8859-10,<br>
-ISO-8859-11, ISO-8859-13, ISO-8859-14, ISO-8859-15, ISO-8859-16
-</p>
-</dl>
-<p>
-
-<dl>
+ISO-8859-11, ISO-8859-13, ISO-8859-14, ISO-8859-15, ISO-8859-16<br>
 <font color="orange">
-<dt><b>What's new</b>
+(GB18030 encoding was contributed by KUBO Takehiro)<br>
+(CP1251 encoding was contributed by Byte)
 </font>
-<ul>
-<li>released Version 3.7.1 (2005/03/07)
-<li>released Version 2.4.2 (2005/03/05)
-</ul>
+</p>
 </dl>
 
 <hr>
 
-<dl>
-<dt>There are two ways of using of it in this program.
-<ul>
-<li> (1) C library (supported APIs: GNU regex, POSIX and Oniguruma native)
-<li> (2) Built-in regular expressions engine of <a href="http://www.ruby-lang.org/">Ruby</a> 1.6/1.8/1.9 <br>
-     In Ruby 1.9, Oniguruma is already integrated by Kazuo Saito.
-</ul>
-</dl>
+<dt><b>License:</b> BSD license.
 
 <dl>
 <dt><b>Platform:</b>
@@ -67,31 +59,27 @@ ISO-8859-11, ISO-8859-13, ISO-8859-14, ISO-8859-15, ISO-8859-16
 </ul>
 
 <br>
-<dt><b>License:</b><br>
-When this software is partly used or it is distributed with Ruby,
-this of Ruby follows the license of Ruby.<br>
-It follows the BSD license in the case of the one except for it.
-</p>
 
 <dt><b>Download:</b>
 <ul>
-<li> <a href="archive/onigd20050307.tar.gz">Latest release version 3.7.1</a> (2005/03/07)   <a href="HISTORY_3X.txt">Change Log</a>
-<li> <a href="archive/onigd20050219.tar.gz">3.7.0</a> (2005/02/19)
-<li> <a href="archive/onigd20050204.tar.gz">3.6.0</a> (2005/02/04)
-<li> <a href="archive/onigd2_4_2.tar.gz">Latest release version 2.4.2</a> (2005/03/05)   <a href="HISTORY_2X.txt">Change Log</a>
-<li> <a href="archive/onigd2_4_1.tar.gz">2.4.1</a> (2005/01/05)
-<li> <a href="archive/onigd2_4_0.tar.gz">2.4.0</a> (2004/12/01)
+<li> <a href="archive/onig-5.9.0.tar.gz">Latest release version 5.9.0</a> (2007/07/14)   <a href="HISTORY_5X.txt">Change Log</a>
+<li> <a href="archive/onig-5.8.0.tar.gz">5.8.0</a> (2007/06/04)
+<li> <a href="archive/onig-5.7.0.tar.gz">5.7.0</a> (2007/04/27)
+<li> <a href="archive/onig-4.7.1.tar.gz">Latest release version 4.7.1</a> (2007/08/16)   <a href="HISTORY_4X.txt">Change Log</a>
+<li> <a href="archive/onig-4.7.0.tar.gz">4.7.0</a> (2007/06/18)
+<li> <a href="archive/onigd2_5_9.tar.gz">Latest release version 2.5.9</a> (2007/06/20)   <a href="HISTORY_2X.txt">Change Log</a>
 </ul>
 
 <br>
 <font color="red">
-* 3.X.X supports UTF-16/UTF-32, Ruby 1.9.X.<br>
-* 2.X.X does not support UTF-16/UTF-32, supports Ruby 1.6/1.8.
+Maintainer of 2.x was changed to Hannes Wyss &lt;hwyss AT ywesee.com&gt;.<br>
+About 2.x, please contact him.<br>
 </font>
+* 5.x supports Unicode Property/Script.<br>
+* 2.x supports Ruby1.6/1.8.<br>
 
 <br>
-<br>
-<dt><b>Documents:</b> (version 3.7.1)
+<dt><b>Documents:</b> (version 5.9.0)
 <ul>
  <li> <a href="doc/RE.txt">Regular Expressions</a>
       <a href="doc/RE.ja.txt">(Japanese: EUC-JP)</a>
@@ -107,72 +95,93 @@ It follows the BSD license in the case of the one except for it.
 </ul>
 
 <br>
-<dt><b>Links:</b>
+<dt><b>Site Links:</b>
 <ul>
-<li> <a href="http://www.ruby-lang.org/cgi-bin/cvsweb.cgi/oniguruma/">Oniguruma in Ruby CVS</a> (old version)
-<li> <a href="http://raa.ruby-lang.org/project/oniguruma/">Oniguruma in RAA</a> (Ruby Application Archive)
 <li> <a href="http://www.freebsd.org/cgi/cvsweb.cgi/ports/devel/oniguruma/">FreeBSD ports</a>
-<li> <a href="http://www.halbiz.com/osaru/cnregex.html">cnRegex 4D Plugin</a> (Japanese page)
+<li> <a href="http://www.softantenna.com/lib/1953/index.html">SoftAntenna &gt; Lib &gt;  Oniguruma</a> (Japanese page)
+</ul>
+
+<br>
+<dt><b>Links:</b>
+<ul>
+<li> <a href="http://homepage3.nifty.com/k-takata/mysoft/bregonig.html">bregonig.dll (Win32)</a> (Japanese page)
+<li> <a href="http://www.halbiz.com/osaru/cnregex.html">cnRegex 4D Plugin (Mac OS X)</a> (Japanese page)
+<li> <a href="http://kmaebashi.com/">crowbar</a> (Japanese page)
+<li> <a href="http://oniguruma5.darwinports.com">Darwin Ports (Mac OS X)</a>
+<li> <a href="http://homepage2.nifty.com/Km/onig.htm">Delphi interface (Win32)</a> (Japanese page)
+<li> <a href="http://pyxis-project.net/ensemble/">Ensemble (Mac OS X)</a> (Japanese page)
+<li> <a href="http://www.srcw.net/FaEdit/">FaEdit (Win32)</a> (Japanese page)
 <li> <a href="http://www.tom.sfc.keio.ac.jp/~sakai/d/?date=20050209">GHC patch</a> Masahiro Sakai (Japanese Blog)
-<li> <a href="http://www.gyazsquare.com/gyazmail/index.php">GyazMail</a>
-<li> <a href="http://www.artman21.net/">Jedit X</a>
-<li> <a href="http://www.chitora.jp/lhaz.html">Lhaz</a> (Japanese page)
+<li> <a href="http://www.gyazsquare.com/gyazmail/index.php">GyazMail (Mac OS X)</a>
+<li> <a href="http://www5d.biglobe.ne.jp/~f-taste/knt3/jcref3.html">J-cref v3</a> (Japanese page)
+<li> <a href="http://www.artman21.net/">Jedit X (Mac OS X)</a>
+<li> <a href="http://www.chitora.jp/lhaz.html">Lhaz (Win32)</a> (Japanese page)
+<li> <a href="http://limechat.net/">LimeChat</a> (Japanese page)
+<li> <a href="http://medb.enhiro.com/">meDB</a> (Japanese page)
+<li> <a href="http://monaos.org/">Mona OS</a>
+<li> <a href="http://mongoose.jp/">mongoose</a> (Japanese page)
 <li> <a href="http://www.irori.org/tool/mregexp.html">mregexp</a> (Japanese page)
-<li> <a href="http://www.trinity-site.net/wiki/index.php?MultiFind">MultiFind</a> (Japanese page)
 <li> <a href="http://ochusha.sourceforge.jp/">Ochusha</a> (Japanese page)
-<li> <a href="http://www-gauge.scphys.kyoto-u.ac.jp/~sonobe/OgreKit/index.html">OgreKit</a> Regular Expression Framework for Cocoa (Japanese page)
-<li> <a href ="http://www.kanetaka.net/4dapi/wiki4d.dll/4dcgi/wiki.cgi?plugins-oniguruma">OnigRegexp</a> (Japanese page)
-<li> <a href ="http://www.moriq.com/onig/">Oniguruma / FireBird (Win32)</a>
-<li> <a href ="http://openspace.timedia.co.jp/~yasuyuki/wiliki/wiliki.cgi?Oniguruma-mysqld&l=jp">Oniguruma-mysqld</a>
-<li> <a href ="http://www.kt.rim.or.jp/~kbk/sed/index.html">Onigsed (Win32)</a> (Japanese page)
+<li> <a href="http://www8.ocn.ne.jp/%7esonoisa/OgreKit/index.html">OgreKit (Mac OS X)</a> Regular Expression Framework for Cocoa (Japanese page)
+<li> <a href="http://www.kanetaka.net/4dapi/wiki4d.dll/4dcgi/wiki.cgi?plugins-oniguruma">OnigRegexp</a> (Japanese page)
+<li> <a href="http://rubyforge.org/projects/oniguruma">Oniguruma for Ruby</a>
+<li> <a href="http://openspace.timedia.co.jp/~yasuyuki/wiliki/wiliki.cgi?Oniguruma-mysqld&l=jp">Oniguruma-mysqld</a>
+<li> <a href="http://www.void.in/wiki/OnigPP">OnigPP</a> (Japanese page)
+<li> <a href="http://www.kt.rim.or.jp/~kbk/sed/index.html">Onigsed (Win32)</a> (Japanese page)
+<li> <a href="http://glozer.net/code.html#oregexp">oregexp</a> Erlang binding
+<li> <a href="http://www.kt.rim.or.jp/~kbk/yagrep/index.html">yagrep (Win32)</a> (Japanese page)
 <li> <a href="http://www.php.gr.jp/">Japan PHP User Group</a> PHP 5.0 mb_ereg (Japanese page)
-<li> <a href="http://www.ruby-lang.org/">Ruby</a>
-<li> <a href="http://quux.s74.xrea.com/">SevenFour</a> (Japanese page)
-<li> <a href="http://www8.ocn.ne.jp/~sonoisa/TiddlyWikiPod/">TiddlyWikiPod</a>
+<li> <a href="http://yatsu.info/wiki/Pufui/">Pufui (Mac OS X)</a> (Japanese page)
+<li> <a href="http://ultrapop.jp/?q2ch">q2ch</a> (Japanese page)
+<li> <a href="http://harumune.s56.xrea.com/assari/index.php?RSSTyping">RSSTyping</a> (Japanese page)
+<li> <a href="http://tobysoft.net/wiki/index.php?Ruby%2Fruby-win32-oniguruma">ruby-win32-oniguruma</a> (Japanese page)
+<li> <a href="http://quux.s74.xrea.com/">SevenFour (Mac OS X)</a> (Japanese page)
+<li> <a href="http://storklab.cyber-ninja.jp/">Stork Lab. Products (Mac OS X)</a> (Japanese page)
+<li> <a href="http://sourceforge.jp/projects/ttssh2/">TeraTerm (Win32)</a>
+<li> <a href="http://www8.ocn.ne.jp/~sonoisa/TiddlyWikiPod/">TiddlyWikiPod (Mac OS X)</a>
+<li> <a href="http://www.cyanworks.net/mac.html">TunesTEXT (Mac OS X)</a>
+<li> <a href="http://sourceforge.jp/projects/frogger/">XML parser</a>
+<li> <a href="http://www.yokkasoft.net/">YokkaSoft (Win32)</a> (Japanese page)
 </ul>
 
 <br>
 <dt><b>References:</b>
 <ul>
 <li> <a href="http://www.ruby-lang.org/ja/man/index.cgi?cmd=view;name=%C0%B5%B5%AC%C9%BD%B8%BD">Ruby Reference Manual Regexp</a> (Japanese page)
-<li> <a href="http://www.perldoc.com/perl5.8.0/pod/perlre.html">Perl regular expressions</a>
+<li> <a href="http://www.perl.com/doc/manual/html/pod/perlre.html">Perl regular expressions</a>
 <li> <a href="http://java.sun.com/j2se/1.4.2/docs/api/java/util/regex/Pattern.html">java.util.regex.Pattern (J2SE 1.4.2)</a>
 <li> <a href="http://www.opengroup.org/onlinepubs/007908799/xbd/re.html">The Open Group</a>
-<li> <a href="http://www.pcre.org/">PCRE</a>
-<!--
-<li> <a href="http://www.jajakarta.org/regexp/">Jakarta Project Regexp</a> (Japanese page)
-<li> <a href="http://www.jajakarta.org/oro/">Jakarta Project ORO</a> (Japanese page)
--->
+<li> <a href="http://regex.info/">Mastering Regular Expressions</a>
+<li> <a href="http://www.unicode.org/">Unicode Home Page</a>
 <li> <a href="http://www.kt.rim.or.jp/~kbk/regex/regex.html">Regular expressions memo</a> (Japanese page)
 <li> <a href="http://www.din.or.jp/~ohzaki/regex.htm">Regular expressions technique</a> (Japanese page)
-<li> <a href="http://regex.info/">Mastering Regular Expressions</a>
 </ul>
 
 <br>
-<!--
-<dt><b>ToDo:</b>
-<ul>
-<li> support character types for all code point range.
-</ul>
--->
 </dl>
 <p>
 and I'm thankful to Akinori MUSHA.
 </p>
 
-<!--
 <hr>
-<font color="red">
-2004-06-14<br>
-To: "Greg A. Woods"<br>
-I can't send mail to you. (rejected)<br>
-Please set the nmatch argument of regexec() to 1,
-and use Oniguruma 3.7.1 or 2.4.2.<br>
-The nmatch argument should be array size of a pmatch.<br>
-But I don't know whether this problem is related to the crash
-that you reported.
-</font>
--->
+<dl>
+<dt><b>Other Libraries:</b>
+<ul>
+<li> <a href="http://www.boost.org/libs/regex/doc/">Boost.Regex</a>
+<li> <a href="http://arglist.com/regex/">A copy of Henry Spencer's</a>
+<li> <a href="http://directory.fsf.org/regex.html">GNU regex</a>
+<li> <a href="http://www.pcre.org/">PCRE</a>
+<li> <a href="http://re2c.org/">re2c</a>
+<li> <a href="http://tiny-rex.sourceforge.net/">T-Rex</a>
+<li> <a href="http://laurikari.net/tre/">TRE</a>
+<li> <a href="http://jregex.sourceforge.net/">JRegex (Java)</a>
+<li> <a href="http://www.cacas.org/java/gnu/regexp/">gnu.regexp for Java</a>
+<li> <a href="http://jakarta.apache.org/regexp/index.html">Jakarta Project Regexp</a>
+<li> <a href="http://jakarta.apache.org/oro/">Jakarta Project ORO</a>
+</ul>
+</dl>
+
 <hr>
+<a href="../">Back to Home</a>
 </body>
 </html>
index 4c029304b6ae52b93c9028ddb138b4ad4bfe1d2c..d9b14191467922aeb1466e00fe4423383b47c4d9 100644 (file)
 #define REGCODE_EUCJP         REG_ENCODING_EUC_JP
 #define REGCODE_SJIS          REG_ENCODING_SJIS
 
+/* Don't use REGCODE_XXXX. (obsoleted) */
+#define MBCTYPE_ASCII         RE_MBCTYPE_ASCII
+#define MBCTYPE_EUC           RE_MBCTYPE_EUC
+#define MBCTYPE_SJIS          RE_MBCTYPE_SJIS
+#define MBCTYPE_UTF8          RE_MBCTYPE_UTF8
+
 typedef unsigned char*   RegTransTableType;
 #define RegOptionType    OnigOptionType
 #define RegDistance      OnigDistance
index b203f6c8a3c9455f2b766dc41da10763c6514f5c..3da9f235c2ef0055d98d7a650bb5e74431f3ce02 100644 (file)
 extern "C" {
 #endif
 
-#define MBCTYPE_ASCII         0
-#define MBCTYPE_EUC           1
-#define MBCTYPE_SJIS          2
-#define MBCTYPE_UTF8          3
+#define RE_MBCTYPE_ASCII         0
+#define RE_MBCTYPE_EUC           1
+#define RE_MBCTYPE_SJIS          2
+#define RE_MBCTYPE_UTF8          3
 
 /* GNU regex options */
 #ifndef RE_NREGS
index 0fc4ac5a11851008503b88a0adda11e28429066e..5196a3d585d8aa39fb3860eff2671fc53f72ea2c 100644 (file)
@@ -4,7 +4,7 @@
   oniguruma.h - Oniguruma (regular expression library)
 **********************************************************************/
 /*-
- * Copyright (c) 2002-2005  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp>
+ * Copyright (c) 2002-2007  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -36,7 +36,7 @@ extern "C" {
 #endif
 
 #define ONIGURUMA
-#define ONIGURUMA_VERSION_MAJOR   3
+#define ONIGURUMA_VERSION_MAJOR   4
 #define ONIGURUMA_VERSION_MINOR   7
 #define ONIGURUMA_VERSION_TEENY   1
 
@@ -49,6 +49,13 @@ extern "C" {
 # endif
 #endif
 
+/* escape Mac OS X/Xcode 2.4/gcc 4.0.1 problem */
+#if defined(__APPLE__) && defined(__GNUC__) && __GNUC__ >= 4
+# ifndef  HAVE_STDARG_PROTOTYPES
+#  define HAVE_STDARG_PROTOTYPES 1
+# endif
+#endif
+
 #ifndef P_
 #if defined(__STDC__) || defined(_WIN32)
 # define P_(args) args
@@ -99,17 +106,11 @@ ONIG_EXTERN OnigAmbigType OnigDefaultAmbigFlag;
 #define ONIGENC_AMBIGUOUS_MATCH_NONE                   0
 #define ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE            (1<<0)
 #define ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE         (1<<1)
-/* #define ONIGENC_AMBIGUOUS_MATCH_ACCENT             (1<<2) */
-/* #define ONIGENC_AMBIGUOUS_MATCH_HIRAGANA_KATAKANA  (1<<3) */
-/* #define ONIGENC_AMBIGUOUS_MATCH_KATAKANA_WIDTH     (1<<4) */
 
 #define ONIGENC_AMBIGUOUS_MATCH_LIMIT                 (1<<1)
-#define ONIGENC_AMBIGUOUS_MATCH_COMPOUND              (1<<30)
 
 #define ONIGENC_AMBIGUOUS_MATCH_FULL \
-  ( ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE | \
-    ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE | \
-    ONIGENC_AMBIGUOUS_MATCH_COMPOUND )
+ ( ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE | ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE )
 #define ONIGENC_AMBIGUOUS_MATCH_DEFAULT  OnigDefaultAmbigFlag
 
 
@@ -167,10 +168,10 @@ typedef struct {
   int    (*code_to_mbc)(OnigCodePoint code, OnigUChar *buf);
   int    (*mbc_to_normalize)(OnigAmbigType flag, const OnigUChar** pp, const OnigUChar* end, OnigUChar* to);
   int    (*is_mbc_ambiguous)(OnigAmbigType flag, const OnigUChar** pp, const OnigUChar* end);
-  int    (*get_all_pair_ambig_codes)(OnigAmbigType flag, OnigPairAmbigCodes** acs);
-  int    (*get_all_comp_ambig_codes)(OnigAmbigType flag, OnigCompAmbigCodes** acs);
+  int    (*get_all_pair_ambig_codes)(OnigAmbigType flag, const OnigPairAmbigCodes** acs);
+  int    (*get_all_comp_ambig_codes)(OnigAmbigType flag, const OnigCompAmbigCodes** acs);
   int    (*is_code_ctype)(OnigCodePoint code, unsigned int ctype);
-  int    (*get_ctype_code_range)(int ctype, OnigCodePoint* sb_range[], OnigCodePoint* mb_range[]);
+  int    (*get_ctype_code_range)(int ctype, const OnigCodePoint* sb_range[], const OnigCodePoint* mb_range[]);
   OnigUChar* (*left_adjust_char_head)(const OnigUChar* start, const OnigUChar* p);
   int    (*is_allowed_reverse_match)(const OnigUChar* p, const OnigUChar* end);
 } OnigEncodingType;
@@ -206,6 +207,7 @@ ONIG_EXTERN OnigEncodingType OnigEncodingSJIS;
 ONIG_EXTERN OnigEncodingType OnigEncodingKOI8;
 ONIG_EXTERN OnigEncodingType OnigEncodingKOI8_R;
 ONIG_EXTERN OnigEncodingType OnigEncodingBIG5;
+ONIG_EXTERN OnigEncodingType OnigEncodingGB18030;
 
 #define ONIG_ENCODING_ASCII        (&OnigEncodingASCII)
 #define ONIG_ENCODING_ISO_8859_1   (&OnigEncodingISO_8859_1)
@@ -236,6 +238,7 @@ ONIG_EXTERN OnigEncodingType OnigEncodingBIG5;
 #define ONIG_ENCODING_KOI8         (&OnigEncodingKOI8)
 #define ONIG_ENCODING_KOI8_R       (&OnigEncodingKOI8_R)
 #define ONIG_ENCODING_BIG5         (&OnigEncodingBIG5)
+#define ONIG_ENCODING_GB18030      (&OnigEncodingGB18030)
 
 #endif /* else RUBY && M17N */
 
@@ -418,11 +421,11 @@ OnigUChar* onigenc_step_back P_((OnigEncoding enc, const OnigUChar* start, const
 
 /* encoding API */
 ONIG_EXTERN
-int onigenc_init P_(());
+int onigenc_init P_((void));
 ONIG_EXTERN
 int onigenc_set_default_encoding P_((OnigEncoding enc));
 ONIG_EXTERN
-OnigEncoding onigenc_get_default_encoding P_(());
+OnigEncoding onigenc_get_default_encoding P_((void));
 ONIG_EXTERN
 void  onigenc_set_default_caseconv_table P_((const OnigUChar* table));
 ONIG_EXTERN
@@ -448,7 +451,7 @@ int onigenc_str_bytelen_null P_((OnigEncoding enc, const OnigUChar* p));
 #define ONIG_NREGION                          10
 #define ONIG_MAX_BACKREF_NUM                1000
 #define ONIG_MAX_REPEAT_NUM               100000
-#define ONIG_MAX_MULTI_BYTE_RANGES_NUM      1000
+#define ONIG_MAX_MULTI_BYTE_RANGES_NUM     10000
 /* constants */
 #define ONIG_MAX_ERROR_MESSAGE_LEN            90
 
@@ -457,8 +460,8 @@ typedef unsigned int        OnigOptionType;
 #define ONIG_OPTION_DEFAULT            ONIG_OPTION_NONE
 
 /* options */
-#define ONIG_OPTION_NONE                 0
-#define ONIG_OPTION_IGNORECASE           1L
+#define ONIG_OPTION_NONE                 0U
+#define ONIG_OPTION_IGNORECASE           1U
 #define ONIG_OPTION_EXTEND               (ONIG_OPTION_IGNORECASE         << 1)
 #define ONIG_OPTION_MULTILINE            (ONIG_OPTION_EXTEND             << 1)
 #define ONIG_OPTION_SINGLELINE           (ONIG_OPTION_MULTILINE          << 1)
@@ -471,6 +474,7 @@ typedef unsigned int        OnigOptionType;
 #define ONIG_OPTION_NOTBOL               (ONIG_OPTION_CAPTURE_GROUP << 1)
 #define ONIG_OPTION_NOTEOL               (ONIG_OPTION_NOTBOL << 1)
 #define ONIG_OPTION_POSIX_REGION         (ONIG_OPTION_NOTEOL << 1)
+#define ONIG_OPTION_MAXBIT               ONIG_OPTION_POSIX_REGION  /* limit */
 
 #define ONIG_OPTION_ON(options,regopt)      ((options) |= (regopt))
 #define ONIG_OPTION_OFF(options,regopt)     ((options) &= ~(regopt))
@@ -484,6 +488,7 @@ typedef struct {
   OnigOptionType options;    /* default option */
 } OnigSyntaxType;
 
+ONIG_EXTERN OnigSyntaxType OnigSyntaxASIS;
 ONIG_EXTERN OnigSyntaxType OnigSyntaxPosixBasic;
 ONIG_EXTERN OnigSyntaxType OnigSyntaxPosixExtended;
 ONIG_EXTERN OnigSyntaxType OnigSyntaxEmacs;
@@ -491,9 +496,11 @@ ONIG_EXTERN OnigSyntaxType OnigSyntaxGrep;
 ONIG_EXTERN OnigSyntaxType OnigSyntaxGnuRegex;
 ONIG_EXTERN OnigSyntaxType OnigSyntaxJava;
 ONIG_EXTERN OnigSyntaxType OnigSyntaxPerl;
+ONIG_EXTERN OnigSyntaxType OnigSyntaxPerl_NG;
 ONIG_EXTERN OnigSyntaxType OnigSyntaxRuby;
 
 /* predefined syntaxes (see regsyntax.c) */
+#define ONIG_SYNTAX_ASIS               (&OnigSyntaxASIS)
 #define ONIG_SYNTAX_POSIX_BASIC        (&OnigSyntaxPosixBasic)
 #define ONIG_SYNTAX_POSIX_EXTENDED     (&OnigSyntaxPosixExtended)
 #define ONIG_SYNTAX_EMACS              (&OnigSyntaxEmacs)
@@ -501,6 +508,7 @@ ONIG_EXTERN OnigSyntaxType OnigSyntaxRuby;
 #define ONIG_SYNTAX_GNU_REGEX          (&OnigSyntaxGnuRegex)
 #define ONIG_SYNTAX_JAVA               (&OnigSyntaxJava)
 #define ONIG_SYNTAX_PERL               (&OnigSyntaxPerl)
+#define ONIG_SYNTAX_PERL_NG            (&OnigSyntaxPerl_NG)
 #define ONIG_SYNTAX_RUBY               (&OnigSyntaxRuby)
 
 /* default syntax */
@@ -508,80 +516,81 @@ ONIG_EXTERN OnigSyntaxType*   OnigDefaultSyntax;
 #define ONIG_SYNTAX_DEFAULT   OnigDefaultSyntax
 
 /* syntax (operators) */
-#define ONIG_SYN_OP_VARIABLE_META_CHARACTERS    (1<<0)
-#define ONIG_SYN_OP_DOT_ANYCHAR                 (1<<1)   /* . */
-#define ONIG_SYN_OP_ASTERISK_ZERO_INF           (1<<2)   /* * */
-#define ONIG_SYN_OP_ESC_ASTERISK_ZERO_INF       (1<<3)
-#define ONIG_SYN_OP_PLUS_ONE_INF                (1<<4)   /* + */
-#define ONIG_SYN_OP_ESC_PLUS_ONE_INF            (1<<5)
-#define ONIG_SYN_OP_QMARK_ZERO_ONE              (1<<6)   /* ? */
-#define ONIG_SYN_OP_ESC_QMARK_ZERO_ONE          (1<<7)
-#define ONIG_SYN_OP_BRACE_INTERVAL              (1<<8)   /* {lower,upper} */
-#define ONIG_SYN_OP_ESC_BRACE_INTERVAL          (1<<9)   /* \{lower,upper\} */
-#define ONIG_SYN_OP_VBAR_ALT                    (1<<10)   /* | */
-#define ONIG_SYN_OP_ESC_VBAR_ALT                (1<<11)  /* \| */
-#define ONIG_SYN_OP_LPAREN_SUBEXP               (1<<12)  /* (...)   */
-#define ONIG_SYN_OP_ESC_LPAREN_SUBEXP           (1<<13)  /* \(...\) */
-#define ONIG_SYN_OP_ESC_AZ_BUF_ANCHOR           (1<<14)  /* \A, \Z, \z */
-#define ONIG_SYN_OP_ESC_CAPITAL_G_BEGIN_ANCHOR  (1<<15)  /* \G     */
-#define ONIG_SYN_OP_DECIMAL_BACKREF             (1<<16)  /* \num   */
-#define ONIG_SYN_OP_BRACKET_CC                  (1<<17)  /* [...]  */
-#define ONIG_SYN_OP_ESC_W_WORD                  (1<<18)  /* \w, \W */
-#define ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END     (1<<19)  /* \<. \> */
-#define ONIG_SYN_OP_ESC_B_WORD_BOUND            (1<<20)  /* \b, \B */
-#define ONIG_SYN_OP_ESC_S_WHITE_SPACE           (1<<21)  /* \s, \S */
-#define ONIG_SYN_OP_ESC_D_DIGIT                 (1<<22)  /* \d, \D */
-#define ONIG_SYN_OP_LINE_ANCHOR                 (1<<23)  /* ^, $   */
-#define ONIG_SYN_OP_POSIX_BRACKET               (1<<24)  /* [:xxxx:] */
-#define ONIG_SYN_OP_QMARK_NON_GREEDY            (1<<25)  /* ??,*?,+?,{n,m}? */
-#define ONIG_SYN_OP_ESC_CONTROL_CHARS           (1<<26)  /* \n,\r,\t,\a ... */
-#define ONIG_SYN_OP_ESC_C_CONTROL               (1<<27)  /* \cx  */
-#define ONIG_SYN_OP_ESC_OCTAL3                  (1<<28)  /* \OOO */
-#define ONIG_SYN_OP_ESC_X_HEX2                  (1<<29)  /* \xHH */
-#define ONIG_SYN_OP_ESC_X_BRACE_HEX8            (1<<30)  /* \x{7HHHHHHH} */
-
-#define ONIG_SYN_OP2_ESC_CAPITAL_Q_QUOTE        (1<<0)   /* \Q...\E */
-#define ONIG_SYN_OP2_QMARK_GROUP_EFFECT         (1<<1)   /* (?...) */
-#define ONIG_SYN_OP2_OPTION_PERL                (1<<2)   /* (?imsx),(?-imsx) */
-#define ONIG_SYN_OP2_OPTION_RUBY                (1<<3)   /* (?imx), (?-imx)  */
-#define ONIG_SYN_OP2_PLUS_POSSESSIVE_REPEAT     (1<<4)   /* ?+,*+,++ */
-#define ONIG_SYN_OP2_PLUS_POSSESSIVE_INTERVAL   (1<<5)   /* {n,m}+   */
-#define ONIG_SYN_OP2_CCLASS_SET_OP              (1<<6)   /* [...&&..[..]..] */
-#define ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP       (1<<7)   /* (?<name>...) */
-#define ONIG_SYN_OP2_ESC_K_NAMED_BACKREF        (1<<8)   /* \k<name> */
-#define ONIG_SYN_OP2_ESC_G_SUBEXP_CALL          (1<<9)   /* \g<name>, \g<n> */
-#define ONIG_SYN_OP2_ATMARK_CAPTURE_HISTORY     (1<<10)  /* (?@..),(?@<x>..) */
-#define ONIG_SYN_OP2_ESC_CAPITAL_C_BAR_CONTROL  (1<<11)  /* \C-x */
-#define ONIG_SYN_OP2_ESC_CAPITAL_M_BAR_META     (1<<12)  /* \M-x */
-#define ONIG_SYN_OP2_ESC_V_VTAB                 (1<<13)  /* \v as VTAB */
-#define ONIG_SYN_OP2_ESC_U_HEX4                 (1<<14)  /* \uHHHH */
-#define ONIG_SYN_OP2_ESC_GNU_BUF_ANCHOR         (1<<15)  /* \`, \' */
-#define ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY  (1<<16)  /* \p{...}, \P{...} */
-#define ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT (1<<17)  /* \p{^..}, \P{^..} */
-#define ONIG_SYN_OP2_CHAR_PROPERTY_PREFIX_IS    (1<<18)  /* \p{IsXDigit} */
-#define ONIG_SYN_OP2_ESC_H_XDIGIT               (1<<19)  /* \h, \H */
+#define ONIG_SYN_OP_VARIABLE_META_CHARACTERS    (1U<<0)
+#define ONIG_SYN_OP_DOT_ANYCHAR                 (1U<<1)   /* . */
+#define ONIG_SYN_OP_ASTERISK_ZERO_INF           (1U<<2)   /* * */
+#define ONIG_SYN_OP_ESC_ASTERISK_ZERO_INF       (1U<<3)
+#define ONIG_SYN_OP_PLUS_ONE_INF                (1U<<4)   /* + */
+#define ONIG_SYN_OP_ESC_PLUS_ONE_INF            (1U<<5)
+#define ONIG_SYN_OP_QMARK_ZERO_ONE              (1U<<6)   /* ? */
+#define ONIG_SYN_OP_ESC_QMARK_ZERO_ONE          (1U<<7)
+#define ONIG_SYN_OP_BRACE_INTERVAL              (1U<<8)   /* {lower,upper} */
+#define ONIG_SYN_OP_ESC_BRACE_INTERVAL          (1U<<9)   /* \{lower,upper\} */
+#define ONIG_SYN_OP_VBAR_ALT                    (1U<<10)   /* | */
+#define ONIG_SYN_OP_ESC_VBAR_ALT                (1U<<11)  /* \| */
+#define ONIG_SYN_OP_LPAREN_SUBEXP               (1U<<12)  /* (...)   */
+#define ONIG_SYN_OP_ESC_LPAREN_SUBEXP           (1U<<13)  /* \(...\) */
+#define ONIG_SYN_OP_ESC_AZ_BUF_ANCHOR           (1U<<14)  /* \A, \Z, \z */
+#define ONIG_SYN_OP_ESC_CAPITAL_G_BEGIN_ANCHOR  (1U<<15)  /* \G     */
+#define ONIG_SYN_OP_DECIMAL_BACKREF             (1U<<16)  /* \num   */
+#define ONIG_SYN_OP_BRACKET_CC                  (1U<<17)  /* [...]  */
+#define ONIG_SYN_OP_ESC_W_WORD                  (1U<<18)  /* \w, \W */
+#define ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END     (1U<<19)  /* \<. \> */
+#define ONIG_SYN_OP_ESC_B_WORD_BOUND            (1U<<20)  /* \b, \B */
+#define ONIG_SYN_OP_ESC_S_WHITE_SPACE           (1U<<21)  /* \s, \S */
+#define ONIG_SYN_OP_ESC_D_DIGIT                 (1U<<22)  /* \d, \D */
+#define ONIG_SYN_OP_LINE_ANCHOR                 (1U<<23)  /* ^, $   */
+#define ONIG_SYN_OP_POSIX_BRACKET               (1U<<24)  /* [:xxxx:] */
+#define ONIG_SYN_OP_QMARK_NON_GREEDY            (1U<<25)  /* ??,*?,+?,{n,m}? */
+#define ONIG_SYN_OP_ESC_CONTROL_CHARS           (1U<<26)  /* \n,\r,\t,\a ... */
+#define ONIG_SYN_OP_ESC_C_CONTROL               (1U<<27)  /* \cx  */
+#define ONIG_SYN_OP_ESC_OCTAL3                  (1U<<28)  /* \OOO */
+#define ONIG_SYN_OP_ESC_X_HEX2                  (1U<<29)  /* \xHH */
+#define ONIG_SYN_OP_ESC_X_BRACE_HEX8            (1U<<30)  /* \x{7HHHHHHH} */
+
+#define ONIG_SYN_OP2_ESC_CAPITAL_Q_QUOTE        (1U<<0)  /* \Q...\E */
+#define ONIG_SYN_OP2_QMARK_GROUP_EFFECT         (1U<<1)  /* (?...) */
+#define ONIG_SYN_OP2_OPTION_PERL                (1U<<2)  /* (?imsx),(?-imsx) */
+#define ONIG_SYN_OP2_OPTION_RUBY                (1U<<3)  /* (?imx), (?-imx)  */
+#define ONIG_SYN_OP2_PLUS_POSSESSIVE_REPEAT     (1U<<4)  /* ?+,*+,++ */
+#define ONIG_SYN_OP2_PLUS_POSSESSIVE_INTERVAL   (1U<<5)  /* {n,m}+   */
+#define ONIG_SYN_OP2_CCLASS_SET_OP              (1U<<6)  /* [...&&..[..]..] */
+#define ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP       (1U<<7)  /* (?<name>...) */
+#define ONIG_SYN_OP2_ESC_K_NAMED_BACKREF        (1U<<8)  /* \k<name> */
+#define ONIG_SYN_OP2_ESC_G_SUBEXP_CALL          (1U<<9)  /* \g<name>, \g<n> */
+#define ONIG_SYN_OP2_ATMARK_CAPTURE_HISTORY     (1U<<10) /* (?@..),(?@<x>..) */
+#define ONIG_SYN_OP2_ESC_CAPITAL_C_BAR_CONTROL  (1U<<11) /* \C-x */
+#define ONIG_SYN_OP2_ESC_CAPITAL_M_BAR_META     (1U<<12) /* \M-x */
+#define ONIG_SYN_OP2_ESC_V_VTAB                 (1U<<13) /* \v as VTAB */
+#define ONIG_SYN_OP2_ESC_U_HEX4                 (1U<<14) /* \uHHHH */
+#define ONIG_SYN_OP2_ESC_GNU_BUF_ANCHOR         (1U<<15) /* \`, \' */
+#define ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY  (1U<<16) /* \p{...}, \P{...} */
+#define ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT (1U<<17) /* \p{^..}, \P{^..} */
+#define ONIG_SYN_OP2_CHAR_PROPERTY_PREFIX_IS    (1U<<18) /* \p{IsXDigit} */
+#define ONIG_SYN_OP2_ESC_H_XDIGIT               (1U<<19) /* \h, \H */
+#define ONIG_SYN_OP2_INEFFECTIVE_ESCAPE         (1U<<20) /* \ */
 
 /* syntax (behavior) */
-#define ONIG_SYN_CONTEXT_INDEP_ANCHORS           (1<<31) /* not implemented */
-#define ONIG_SYN_CONTEXT_INDEP_REPEAT_OPS        (1<<0)  /* ?, *, +, {n,m} */
-#define ONIG_SYN_CONTEXT_INVALID_REPEAT_OPS      (1<<1)  /* error or ignore */
-#define ONIG_SYN_ALLOW_UNMATCHED_CLOSE_SUBEXP    (1<<2)  /* ...)... */
-#define ONIG_SYN_ALLOW_INVALID_INTERVAL          (1<<3)  /* {??? */
-#define ONIG_SYN_ALLOW_INTERVAL_LOW_ABBREV       (1<<4)  /* {,n} => {0,n} */
-#define ONIG_SYN_STRICT_CHECK_BACKREF            (1<<5)  /* /(\1)/,/\1()/ ..*/
-#define ONIG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND   (1<<6)  /* (?<=a|bc) */
-#define ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP        (1<<7)  /* see doc/RE */
-#define ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME (1<<8)  /* (?<x>)(?<x>) */
-#define ONIG_SYN_FIXED_INTERVAL_IS_GREEDY_ONLY   (1<<9)  /* a{n}?=(?:a{n})? */
+#define ONIG_SYN_CONTEXT_INDEP_ANCHORS           (1U<<31) /* not implemented */
+#define ONIG_SYN_CONTEXT_INDEP_REPEAT_OPS        (1U<<0)  /* ?, *, +, {n,m} */
+#define ONIG_SYN_CONTEXT_INVALID_REPEAT_OPS      (1U<<1)  /* error or ignore */
+#define ONIG_SYN_ALLOW_UNMATCHED_CLOSE_SUBEXP    (1U<<2)  /* ...)... */
+#define ONIG_SYN_ALLOW_INVALID_INTERVAL          (1U<<3)  /* {??? */
+#define ONIG_SYN_ALLOW_INTERVAL_LOW_ABBREV       (1U<<4)  /* {,n} => {0,n} */
+#define ONIG_SYN_STRICT_CHECK_BACKREF            (1U<<5)  /* /(\1)/,/\1()/ ..*/
+#define ONIG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND   (1U<<6)  /* (?<=a|bc) */
+#define ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP        (1U<<7)  /* see doc/RE */
+#define ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME (1U<<8)  /* (?<x>)(?<x>) */
+#define ONIG_SYN_FIXED_INTERVAL_IS_GREEDY_ONLY   (1U<<9)  /* a{n}?=(?:a{n})? */
 
 /* syntax (behavior) in char class [...] */
-#define ONIG_SYN_NOT_NEWLINE_IN_NEGATIVE_CC      (1<<20) /* [^...] */
-#define ONIG_SYN_BACKSLASH_ESCAPE_IN_CC          (1<<21) /* [..\w..] etc.. */
-#define ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC         (1<<22)
-#define ONIG_SYN_ALLOW_DOUBLE_RANGE_OP_IN_CC     (1<<23) /* [0-9-a]=[0-9\-a] */
+#define ONIG_SYN_NOT_NEWLINE_IN_NEGATIVE_CC      (1U<<20) /* [^...] */
+#define ONIG_SYN_BACKSLASH_ESCAPE_IN_CC          (1U<<21) /* [..\w..] etc.. */
+#define ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC         (1U<<22)
+#define ONIG_SYN_ALLOW_DOUBLE_RANGE_OP_IN_CC     (1U<<23) /* [0-9-a]=[0-9\-a] */
 /* syntax (behavior) warning */
-#define ONIG_SYN_WARN_CC_OP_NOT_ESCAPED          (1<<24) /* [,-,] */
-#define ONIG_SYN_WARN_REDUNDANT_NESTED_REPEAT    (1<<25) /* (?:a*)+ */
+#define ONIG_SYN_WARN_CC_OP_NOT_ESCAPED          (1U<<24) /* [,-,] */
+#define ONIG_SYN_WARN_REDUNDANT_NESTED_REPEAT    (1U<<25) /* (?:a*)+ */
 
 /* meta character specifiers (onig_set_meta_char()) */
 #define ONIG_META_CHAR_ESCAPE               0
@@ -660,6 +669,7 @@ ONIG_EXTERN OnigSyntaxType*   OnigDefaultSyntax;
 #define ONIGERR_INVALID_WIDE_CHAR_VALUE                      -400
 #define ONIGERR_TOO_BIG_WIDE_CHAR_VALUE                      -401
 #define ONIGERR_NOT_SUPPORTED_ENCODING_COMBINATION           -402
+#define ONIGERR_INVALID_COMBINATION_OF_OPTIONS               -403
 
 /* errors related to thread */
 #define ONIGERR_OVER_THREAD_PASS_LIMIT_COUNT                -1001
@@ -701,6 +711,7 @@ struct re_registers {
 typedef struct re_registers   OnigRegion;
 
 typedef struct {
+  OnigEncoding enc;
   OnigUChar* par;
   OnigUChar* par_end;
 } OnigErrorInfo;
@@ -735,6 +746,7 @@ typedef struct re_pattern_buffer {
   int num_mem;                   /* used memory(...) num counted from 1 */
   int num_repeat;                /* OP_REPEAT/OP_REPEAT_NG id-counter */
   int num_null_check;            /* OP_NULL_CHECK_START/END id counter */
+  int num_comb_exp_check;        /* combination explosion check */
   int num_call;                  /* number of subexp call */
   unsigned int capture_history;  /* (?@...) flag (1-31) */
   unsigned int bt_mem_start;     /* need backtrack flag */
@@ -766,7 +778,13 @@ typedef struct re_pattern_buffer {
 
   /* regex_t link chain */
   struct re_pattern_buffer* chain;  /* escape compile-conflict */
-} regex_t;
+} OnigRegexType;
+
+typedef OnigRegexType*  OnigRegex;
+
+#ifndef ONIG_ESCAPE_REGEX_T_COLLISION
+  typedef OnigRegexType  regex_t;
+#endif
 
 
 typedef struct {
@@ -788,19 +806,19 @@ void onig_set_warn_func P_((OnigWarnFunc f));
 ONIG_EXTERN
 void onig_set_verb_warn_func P_((OnigWarnFunc f));
 ONIG_EXTERN
-int onig_new P_((regex_t**, const OnigUChar* pattern, const OnigUChar* pattern_end, OnigOptionType option, OnigEncoding enc, OnigSyntaxType* syntax, OnigErrorInfo* einfo));
+int onig_new P_((OnigRegex*, const OnigUChar* pattern, const OnigUChar* pattern_end, OnigOptionType option, OnigEncoding enc, OnigSyntaxType* syntax, OnigErrorInfo* einfo));
 ONIG_EXTERN
-int onig_new_deluxe P_((regex_t** reg, const OnigUChar* pattern, const OnigUChar* pattern_end, OnigCompileInfo* ci, OnigErrorInfo* einfo));
+int onig_new_deluxe P_((OnigRegex* reg, const OnigUChar* pattern, const OnigUChar* pattern_end, OnigCompileInfo* ci, OnigErrorInfo* einfo));
 ONIG_EXTERN
-void onig_free P_((regex_t*));
+void onig_free P_((OnigRegex));
 ONIG_EXTERN
-int onig_recompile P_((regex_t*, const OnigUChar* pattern, const OnigUChar* pattern_end, OnigOptionType option, OnigEncoding enc, OnigSyntaxType* syntax, OnigErrorInfo* einfo));
+int onig_recompile P_((OnigRegex, const OnigUChar* pattern, const OnigUChar* pattern_end, OnigOptionType option, OnigEncoding enc, OnigSyntaxType* syntax, OnigErrorInfo* einfo));
 ONIG_EXTERN
-int onig_recompile_deluxe P_((regex_t* reg, const OnigUChar* pattern, const OnigUChar* pattern_end, OnigCompileInfo* ci, OnigErrorInfo* einfo));
+int onig_recompile_deluxe P_((OnigRegex reg, const OnigUChar* pattern, const OnigUChar* pattern_end, OnigCompileInfo* ci, OnigErrorInfo* einfo));
 ONIG_EXTERN
-int onig_search P_((regex_t*, const OnigUChar* str, const OnigUChar* end, const OnigUChar* start, const OnigUChar* range, OnigRegion* region, OnigOptionType option));
+int onig_search P_((OnigRegex, const OnigUChar* str, const OnigUChar* end, const OnigUChar* start, const OnigUChar* range, OnigRegion* region, OnigOptionType option));
 ONIG_EXTERN
-int onig_match P_((regex_t*, const OnigUChar* str, const OnigUChar* end, const OnigUChar* at, OnigRegion* region, OnigOptionType option));
+int onig_match P_((OnigRegex, const OnigUChar* str, const OnigUChar* end, const OnigUChar* at, OnigRegion* region, OnigOptionType option));
 ONIG_EXTERN
 OnigRegion* onig_region_new P_((void));
 ONIG_EXTERN
@@ -816,29 +834,31 @@ int onig_region_resize P_((OnigRegion* region, int n));
 ONIG_EXTERN
 int onig_region_set P_((OnigRegion* region, int at, int beg, int end));
 ONIG_EXTERN
-int onig_name_to_group_numbers P_((regex_t* reg, const OnigUChar* name, const OnigUChar* name_end, int** nums));
+int onig_name_to_group_numbers P_((OnigRegex reg, const OnigUChar* name, const OnigUChar* name_end, int** nums));
 ONIG_EXTERN
-int onig_name_to_backref_number P_((regex_t* reg, const OnigUChar* name, const OnigUChar* name_end, OnigRegion *region));
+int onig_name_to_backref_number P_((OnigRegex reg, const OnigUChar* name, const OnigUChar* name_end, OnigRegion *region));
 ONIG_EXTERN
-int onig_foreach_name P_((regex_t* reg, int (*func)(const OnigUChar*, const OnigUChar*,int,int*,regex_t*,void*), void* arg));
+int onig_foreach_name P_((OnigRegex reg, int (*func)(const OnigUChar*, const OnigUChar*,int,int*,OnigRegex,void*), void* arg));
 ONIG_EXTERN
-int onig_number_of_names P_((regex_t* reg));
+int onig_number_of_names P_((OnigRegex reg));
 ONIG_EXTERN
-int onig_number_of_captures P_((regex_t* reg));
+int onig_number_of_captures P_((OnigRegex reg));
 ONIG_EXTERN
-int onig_number_of_capture_histories P_((regex_t* reg));
+int onig_number_of_capture_histories P_((OnigRegex reg));
 ONIG_EXTERN
 OnigCaptureTreeNode* onig_get_capture_tree P_((OnigRegion* region));
 ONIG_EXTERN
 int onig_capture_tree_traverse P_((OnigRegion* region, int at, int(*callback_func)(int,int,int,int,int,void*), void* arg));
 ONIG_EXTERN
-OnigEncoding onig_get_encoding P_((regex_t* reg));
+int onig_noname_group_capture_is_active P_((OnigRegex reg));
+ONIG_EXTERN
+OnigEncoding onig_get_encoding P_((OnigRegex reg));
 ONIG_EXTERN
-OnigOptionType onig_get_options P_((regex_t* reg));
+OnigOptionType onig_get_options P_((OnigRegex reg));
 ONIG_EXTERN
-OnigAmbigType onig_get_ambig_flag P_((regex_t* reg));
+OnigAmbigType onig_get_ambig_flag P_((OnigRegex reg));
 ONIG_EXTERN
-OnigSyntaxType* onig_get_syntax P_((regex_t* reg));
+OnigSyntaxType* onig_get_syntax P_((OnigRegex reg));
 ONIG_EXTERN
 int onig_set_default_syntax P_((OnigSyntaxType* syntax));
 ONIG_EXTERN
@@ -864,7 +884,7 @@ int onig_set_meta_char P_((OnigEncoding enc, unsigned int what, OnigCodePoint co
 ONIG_EXTERN
 void onig_copy_encoding P_((OnigEncoding to, OnigEncoding from));
 ONIG_EXTERN
-OnigAmbigType onig_get_default_ambig_flag P_(());
+OnigAmbigType onig_get_default_ambig_flag P_((void));
 ONIG_EXTERN
 int onig_set_default_ambig_flag P_((OnigAmbigType ambig_flag));
 ONIG_EXTERN
index a2315fcec539b3bc8264f05aa971e15fb2f68e03..6a0976dee225aa66f8d4c954ec8b80d2313e18d4 100644 (file)
@@ -2,7 +2,7 @@
   regcomp.c -  Oniguruma (regular expression library)
 **********************************************************************/
 /*-
- * Copyright (c) 2002-2005  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp>
+ * Copyright (c) 2002-2007  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -34,7 +34,7 @@ OnigAmbigType OnigDefaultAmbigFlag =
    ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE);
 
 extern OnigAmbigType
-onig_get_default_ambig_flag()
+onig_get_default_ambig_flag(void)
 {
   return OnigDefaultAmbigFlag;
 }
@@ -47,10 +47,6 @@ onig_set_default_ambig_flag(OnigAmbigType ambig_flag)
 }
 
 
-#ifndef PLATFORM_UNALIGNED_WORD_ACCESS
-static unsigned char PadBuf[WORD_ALIGNMENT_SIZE];
-#endif
-
 static UChar*
 k_strdup(UChar* s, UChar* end)
 {
@@ -186,6 +182,17 @@ add_opcode(regex_t* reg, int opcode)
   return 0;
 }
 
+#ifdef USE_COMBINATION_EXPLOSION_CHECK
+static int
+add_state_check_num(regex_t* reg, int num)
+{
+  StateCheckNumType n = (StateCheckNumType )num;
+
+  BBUF_ADD(reg, &n, SIZE_STATE_CHECK_NUM);
+  return 0;
+}
+#endif
+
 static int
 add_rel_addr(regex_t* reg, int addr)
 {
@@ -528,6 +535,8 @@ add_multi_byte_cclass(BBuf* mbuf, regex_t* reg)
   add_length(reg, mbuf->used);
   return add_bytes(reg, mbuf->p, mbuf->used);
 #else
+  static unsigned char PadBuf[WORD_ALIGNMENT_SIZE];
+
   int r, pad_size;
   UChar* p = BBUF_GET_ADD_ADDRESS(reg) + SIZE_LENGTH;
 
@@ -644,12 +653,12 @@ entry_repeat_range(regex_t* reg, int id, int lower, int upper)
   }
 
   p[id].lower = lower;
-  p[id].upper = upper;
+  p[id].upper = (IS_REPEAT_INFINITE(upper) ? 0x7fffffff : upper);
   return 0;
 }
 
 static int
-compile_range_repeat_node(QualifierNode* qn, int target_len, int empty_info,
+compile_range_repeat_node(QuantifierNode* qn, int target_len, int empty_info,
                           regex_t* reg)
 {
   int r;
@@ -673,7 +682,7 @@ compile_range_repeat_node(QualifierNode* qn, int target_len, int empty_info,
 #ifdef USE_SUBEXP_CALL
       reg->num_call > 0 ||
 #endif
-      IS_QUALIFIER_IN_REPEAT(qn)) {
+      IS_QUANTIFIER_IN_REPEAT(qn)) {
     r = add_opcode(reg, qn->greedy ? OP_REPEAT_INC_SG : OP_REPEAT_INC_NG_SG);
   }
   else {
@@ -684,10 +693,257 @@ compile_range_repeat_node(QualifierNode* qn, int target_len, int empty_info,
   return r;
 }
 
-#define QUALIFIER_EXPAND_LIMIT_SIZE   50
+static int
+is_anychar_star_quantifier(QuantifierNode* qn)
+{
+  if (qn->greedy && IS_REPEAT_INFINITE(qn->upper) &&
+      NTYPE(qn->target) == N_ANYCHAR)
+    return 1;
+  else
+    return 0;
+}
+
+#define QUANTIFIER_EXPAND_LIMIT_SIZE   50
+#define CKN_ON   (ckn > 0)
+
+#ifdef USE_COMBINATION_EXPLOSION_CHECK
 
 static int
-compile_length_qualifier_node(QualifierNode* qn, regex_t* reg)
+compile_length_quantifier_node(QuantifierNode* qn, regex_t* reg)
+{
+  int len, mod_tlen, cklen;
+  int ckn;
+  int infinite = IS_REPEAT_INFINITE(qn->upper);
+  int empty_info = qn->target_empty_info;
+  int tlen = compile_length_tree(qn->target, reg);
+
+  if (tlen < 0) return tlen;
+
+  ckn = ((reg->num_comb_exp_check > 0) ? qn->comb_exp_check_num : 0);
+
+  cklen = (CKN_ON ? SIZE_STATE_CHECK_NUM: 0);
+
+  /* anychar repeat */
+  if (NTYPE(qn->target) == N_ANYCHAR) {
+    if (qn->greedy && infinite) {
+      if (IS_NOT_NULL(qn->next_head_exact) && !CKN_ON)
+        return SIZE_OP_ANYCHAR_STAR_PEEK_NEXT + tlen * qn->lower + cklen;
+      else
+        return SIZE_OP_ANYCHAR_STAR + tlen * qn->lower + cklen;
+    }
+  }
+
+  if (empty_info != 0)
+    mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END);
+  else
+    mod_tlen = tlen;
+
+  if (infinite && qn->lower <= 1) {
+    if (qn->greedy) {
+      if (qn->lower == 1)
+       len = SIZE_OP_JUMP;
+      else
+       len = 0;
+
+      len += SIZE_OP_PUSH + cklen + mod_tlen + SIZE_OP_JUMP;
+    }
+    else {
+      if (qn->lower == 0)
+       len = SIZE_OP_JUMP;
+      else
+       len = 0;
+
+      len += mod_tlen + SIZE_OP_PUSH + cklen;
+    }
+  }
+  else if (qn->upper == 0) {
+    if (qn->is_refered != 0) /* /(?<n>..){0}/ */
+      len = SIZE_OP_JUMP + tlen;
+    else
+      len = 0;
+  }
+  else if (qn->upper == 1 && qn->greedy) {
+    if (qn->lower == 0) {
+      if (CKN_ON) {
+       len = SIZE_OP_STATE_CHECK_PUSH + tlen;
+      }
+      else {
+       len = SIZE_OP_PUSH + tlen;
+      }
+    }
+    else {
+      len = tlen;
+    }
+  }
+  else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */
+    len = SIZE_OP_PUSH + cklen + SIZE_OP_JUMP + tlen;
+  }
+  else {
+    len = SIZE_OP_REPEAT_INC
+        + mod_tlen + SIZE_OPCODE + SIZE_RELADDR + SIZE_MEMNUM;
+    if (CKN_ON)
+      len += SIZE_OP_STATE_CHECK;
+  }
+
+  return len;
+}
+
+static int
+compile_quantifier_node(QuantifierNode* qn, regex_t* reg)
+{
+  int r, mod_tlen;
+  int ckn;
+  int infinite = IS_REPEAT_INFINITE(qn->upper);
+  int empty_info = qn->target_empty_info;
+  int tlen = compile_length_tree(qn->target, reg);
+
+  if (tlen < 0) return tlen;
+
+  ckn = ((reg->num_comb_exp_check > 0) ? qn->comb_exp_check_num : 0);
+
+  if (is_anychar_star_quantifier(qn)) {
+    r = compile_tree_n_times(qn->target, qn->lower, reg);
+    if (r) return r;
+    if (IS_NOT_NULL(qn->next_head_exact) && !CKN_ON) {
+      if (IS_MULTILINE(reg->options))
+       r = add_opcode(reg, OP_ANYCHAR_ML_STAR_PEEK_NEXT);
+      else
+       r = add_opcode(reg, OP_ANYCHAR_STAR_PEEK_NEXT);
+      if (r) return r;
+      if (CKN_ON) {
+       r = add_state_check_num(reg, ckn);
+       if (r) return r;
+      }
+
+      return add_bytes(reg, NSTRING(qn->next_head_exact).s, 1);
+    }
+    else {
+      if (IS_MULTILINE(reg->options)) {
+       r = add_opcode(reg, (CKN_ON ?
+                              OP_STATE_CHECK_ANYCHAR_ML_STAR
+                            : OP_ANYCHAR_ML_STAR));
+      }
+      else {
+       r = add_opcode(reg, (CKN_ON ?
+                              OP_STATE_CHECK_ANYCHAR_STAR
+                            : OP_ANYCHAR_STAR));
+      }
+      if (r) return r;
+      if (CKN_ON)
+       r = add_state_check_num(reg, ckn);
+
+      return r;
+    }
+  }
+
+  if (empty_info != 0)
+    mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END);
+  else
+    mod_tlen = tlen;
+
+  if (infinite && qn->lower <= 1) {
+    if (qn->greedy) {
+      if (qn->lower == 1) {
+       r = add_opcode_rel_addr(reg, OP_JUMP,
+                       (CKN_ON ? SIZE_OP_STATE_CHECK_PUSH : SIZE_OP_PUSH));
+       if (r) return r;
+      }
+
+      if (CKN_ON) {
+       r = add_opcode(reg, OP_STATE_CHECK_PUSH);
+       if (r) return r;
+       r = add_state_check_num(reg, ckn);
+       if (r) return r;
+       r = add_rel_addr(reg, mod_tlen + SIZE_OP_JUMP);
+      }
+      else {
+       r = add_opcode_rel_addr(reg, OP_PUSH, mod_tlen + SIZE_OP_JUMP);
+      }
+      if (r) return r;
+      r = compile_tree_empty_check(qn->target, reg, empty_info);
+      if (r) return r;
+      r = add_opcode_rel_addr(reg, OP_JUMP,
+             -(mod_tlen + (int )SIZE_OP_JUMP
+               + (int )(CKN_ON ? SIZE_OP_STATE_CHECK_PUSH : SIZE_OP_PUSH)));
+    }
+    else {
+      if (qn->lower == 0) {
+       r = add_opcode_rel_addr(reg, OP_JUMP, mod_tlen);
+       if (r) return r;
+      }
+      r = compile_tree_empty_check(qn->target, reg, empty_info);
+      if (r) return r;
+      if (CKN_ON) {
+       r = add_opcode(reg, OP_STATE_CHECK_PUSH_OR_JUMP);
+       if (r) return r;
+       r = add_state_check_num(reg, ckn);
+       if (r) return r;
+       r = add_rel_addr(reg,
+                -(mod_tlen + (int )SIZE_OP_STATE_CHECK_PUSH_OR_JUMP));
+      }
+      else
+       r = add_opcode_rel_addr(reg, OP_PUSH, -(mod_tlen + (int )SIZE_OP_PUSH));
+    }
+  }
+  else if (qn->upper == 0) {
+    if (qn->is_refered != 0) { /* /(?<n>..){0}/ */
+      r = add_opcode_rel_addr(reg, OP_JUMP, tlen);
+      if (r) return r;
+      r = compile_tree(qn->target, reg);
+    }
+    else
+      r = 0;
+  }
+  else if (qn->upper == 1 && qn->greedy) {
+    if (qn->lower == 0) {
+      if (CKN_ON) {
+       r = add_opcode(reg, OP_STATE_CHECK_PUSH);
+       if (r) return r;
+       r = add_state_check_num(reg, ckn);
+       if (r) return r;
+       r = add_rel_addr(reg, tlen);
+      }
+      else {
+       r = add_opcode_rel_addr(reg, OP_PUSH, tlen);
+      }
+      if (r) return r;
+    }
+
+    r = compile_tree(qn->target, reg);
+  }
+  else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */
+    if (CKN_ON) {
+      r = add_opcode(reg, OP_STATE_CHECK_PUSH);
+      if (r) return r;
+      r = add_state_check_num(reg, ckn);
+      if (r) return r;
+      r = add_rel_addr(reg, SIZE_OP_JUMP);
+    }
+    else {
+      r = add_opcode_rel_addr(reg, OP_PUSH, SIZE_OP_JUMP);
+    }
+
+    if (r) return r;
+    r = add_opcode_rel_addr(reg, OP_JUMP, tlen);
+    if (r) return r;
+    r = compile_tree(qn->target, reg);
+  }
+  else {
+    r = compile_range_repeat_node(qn, mod_tlen, empty_info, reg);
+    if (CKN_ON) {
+      if (r) return r;
+      r = add_opcode(reg, OP_STATE_CHECK);
+      if (r) return r;
+      r = add_state_check_num(reg, ckn);
+    }
+  }
+  return r;
+}
+
+#else /* USE_COMBINATION_EXPLOSION_CHECK */
+
+static int
+compile_length_quantifier_node(QuantifierNode* qn, regex_t* reg)
 {
   int len, mod_tlen;
   int infinite = IS_REPEAT_INFINITE(qn->upper);
@@ -712,8 +968,8 @@ compile_length_qualifier_node(QualifierNode* qn, regex_t* reg)
     mod_tlen = tlen;
 
   if (infinite &&
-      (qn->lower <= 1 || tlen * qn->lower <= QUALIFIER_EXPAND_LIMIT_SIZE)) {
-    if (qn->lower == 1 && tlen > QUALIFIER_EXPAND_LIMIT_SIZE) {
+      (qn->lower <= 1 || tlen * qn->lower <= QUANTIFIER_EXPAND_LIMIT_SIZE)) {
+    if (qn->lower == 1 && tlen > QUANTIFIER_EXPAND_LIMIT_SIZE) {
       len = SIZE_OP_JUMP;
     }
     else {
@@ -736,7 +992,7 @@ compile_length_qualifier_node(QualifierNode* qn, regex_t* reg)
   }
   else if (!infinite && qn->greedy &&
            (qn->upper == 1 || (tlen + SIZE_OP_PUSH) * qn->upper
-                                      <= QUALIFIER_EXPAND_LIMIT_SIZE)) {
+                                      <= QUANTIFIER_EXPAND_LIMIT_SIZE)) {
     len = tlen * qn->lower;
     len += (SIZE_OP_PUSH + tlen) * (qn->upper - qn->lower);
   }
@@ -752,17 +1008,7 @@ compile_length_qualifier_node(QualifierNode* qn, regex_t* reg)
 }
 
 static int
-is_anychar_star_qualifier(QualifierNode* qn)
-{
-  if (qn->greedy && IS_REPEAT_INFINITE(qn->upper) &&
-      NTYPE(qn->target) == N_ANYCHAR)
-    return 1;
-  else
-    return 0;
-}
-
-static int
-compile_qualifier_node(QualifierNode* qn, regex_t* reg)
+compile_quantifier_node(QuantifierNode* qn, regex_t* reg)
 {
   int i, r, mod_tlen;
   int infinite = IS_REPEAT_INFINITE(qn->upper);
@@ -771,7 +1017,7 @@ compile_qualifier_node(QualifierNode* qn, regex_t* reg)
 
   if (tlen < 0) return tlen;
 
-  if (is_anychar_star_qualifier(qn)) {
+  if (is_anychar_star_quantifier(qn)) {
     r = compile_tree_n_times(qn->target, qn->lower, reg);
     if (r) return r;
     if (IS_NOT_NULL(qn->next_head_exact)) {
@@ -796,8 +1042,8 @@ compile_qualifier_node(QualifierNode* qn, regex_t* reg)
     mod_tlen = tlen;
 
   if (infinite &&
-      (qn->lower <= 1 || tlen * qn->lower <= QUALIFIER_EXPAND_LIMIT_SIZE)) {
-    if (qn->lower == 1 && tlen > QUALIFIER_EXPAND_LIMIT_SIZE) {
+      (qn->lower <= 1 || tlen * qn->lower <= QUANTIFIER_EXPAND_LIMIT_SIZE)) {
+    if (qn->lower == 1 && tlen > QUANTIFIER_EXPAND_LIMIT_SIZE) {
       if (qn->greedy) {
        if (IS_NOT_NULL(qn->head_exact))
          r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_PUSH_OR_JUMP_EXACT1);
@@ -861,7 +1107,7 @@ compile_qualifier_node(QualifierNode* qn, regex_t* reg)
   }
   else if (!infinite && qn->greedy &&
            (qn->upper == 1 || (tlen + SIZE_OP_PUSH) * qn->upper
-                                  <= QUALIFIER_EXPAND_LIMIT_SIZE)) {
+                                  <= QUANTIFIER_EXPAND_LIMIT_SIZE)) {
     int n = qn->upper - qn->lower;
 
     r = compile_tree_n_times(qn->target, qn->lower, reg);
@@ -887,6 +1133,7 @@ compile_qualifier_node(QualifierNode* qn, regex_t* reg)
   }
   return r;
 }
+#endif /* USE_COMBINATION_EXPLOSION_CHECK */
 
 static int
 compile_length_option_node(EffectNode* node, regex_t* reg)
@@ -978,7 +1225,7 @@ compile_length_effect_node(EffectNode* node, regex_t* reg)
 
   case EFFECT_STOP_BACKTRACK:
     if (IS_EFFECT_STOP_BT_SIMPLE_REPEAT(node)) {
-      QualifierNode* qn = &NQUALIFIER(node->target);
+      QuantifierNode* qn = &NQUANTIFIER(node->target);
       tlen = compile_length_tree(qn->target, reg);
       if (tlen < 0) return tlen;
 
@@ -1068,7 +1315,7 @@ compile_effect_node(EffectNode* node, regex_t* reg)
 
   case EFFECT_STOP_BACKTRACK:
     if (IS_EFFECT_STOP_BT_SIMPLE_REPEAT(node)) {
-      QualifierNode* qn = &NQUALIFIER(node->target);
+      QuantifierNode* qn = &NQUANTIFIER(node->target);
       r = compile_tree_n_times(qn->target, qn->lower, reg);
       if (r) return r;
 
@@ -1268,8 +1515,15 @@ compile_length_tree(Node* node, regex_t* reg)
     {
       BackrefNode* br = &(NBACKREF(node));
 
+#ifdef USE_BACKREF_AT_LEVEL
+      if (IS_BACKREF_NEST_LEVEL(br)) {
+        r = SIZE_OPCODE + SIZE_OPTION + SIZE_LENGTH +
+            SIZE_LENGTH + (SIZE_MEMNUM * br->back_num);
+      }
+      else
+#endif
       if (br->back_num == 1) {
-       r = ((!IS_IGNORECASE(reg->options) && br->back_static[0] <= 3)
+       r = ((!IS_IGNORECASE(reg->options) && br->back_static[0] <= 2)
             ? SIZE_OPCODE : (SIZE_OPCODE + SIZE_MEMNUM));
       }
       else {
@@ -1284,8 +1538,8 @@ compile_length_tree(Node* node, regex_t* reg)
     break;
 #endif
 
-  case N_QUALIFIER:
-    r = compile_length_qualifier_node(&(NQUALIFIER(node)), reg);
+  case N_QUANTIFIER:
+    r = compile_length_quantifier_node(&(NQUANTIFIER(node)), reg);
     break;
 
   case N_EFFECT:
@@ -1381,9 +1635,21 @@ compile_tree(Node* node, regex_t* reg)
 
   case N_BACKREF:
     {
-      int i;
       BackrefNode* br = &(NBACKREF(node));
 
+#ifdef USE_BACKREF_AT_LEVEL
+      if (IS_BACKREF_NEST_LEVEL(br)) {
+       r = add_opcode(reg, OP_BACKREF_AT_LEVEL);
+       if (r) return r;
+       r = add_option(reg, (reg->options & ONIG_OPTION_IGNORECASE));
+       if (r) return r;
+       r = add_length(reg, br->nest_level);
+       if (r) return r;
+
+       goto add_bacref_mems;
+      }
+      else
+#endif
       if (br->back_num == 1) {
        n = br->back_static[0];
        if (IS_IGNORECASE(reg->options)) {
@@ -1395,7 +1661,6 @@ compile_tree(Node* node, regex_t* reg)
          switch (n) {
          case 1:  r = add_opcode(reg, OP_BACKREF1); break;
          case 2:  r = add_opcode(reg, OP_BACKREF2); break;
-         case 3:  r = add_opcode(reg, OP_BACKREF3); break;
          default:
            r = add_opcode(reg, OP_BACKREFN);
            if (r) return r;
@@ -1405,17 +1670,21 @@ compile_tree(Node* node, regex_t* reg)
        }
       }
       else {
+       int i;
        int* p;
 
         if (IS_IGNORECASE(reg->options)) {
-          add_opcode(reg, OP_BACKREF_MULTI_IC);
+          r = add_opcode(reg, OP_BACKREF_MULTI_IC);
         }
         else {
-          add_opcode(reg, OP_BACKREF_MULTI);
+          r = add_opcode(reg, OP_BACKREF_MULTI);
         }
-
        if (r) return r;
-       add_length(reg, br->back_num);
+
+#ifdef USE_BACKREF_AT_LEVEL
+      add_bacref_mems:
+#endif
+       r = add_length(reg, br->back_num);
        if (r) return r;
        p = BACKREFS_P(br);
        for (i = br->back_num - 1; i >= 0; i--) {
@@ -1432,8 +1701,8 @@ compile_tree(Node* node, regex_t* reg)
     break;
 #endif
 
-  case N_QUALIFIER:
-    r = compile_qualifier_node(&(NQUALIFIER(node)), reg);
+  case N_QUANTIFIER:
+    r = compile_quantifier_node(&(NQUANTIFIER(node)), reg);
     break;
 
   case N_EFFECT:
@@ -1470,13 +1739,13 @@ noname_disable_map(Node** plink, GroupNumRemap* map, int* counter)
     } while (r == 0 && IS_NOT_NULL(node = NCONS(node).right));
     break;
 
-  case N_QUALIFIER:
+  case N_QUANTIFIER:
     {
-      Node** ptarget = &(NQUALIFIER(node).target);
+      Node** ptarget = &(NQUANTIFIER(node).target);
       Node*  old = *ptarget;
       r = noname_disable_map(ptarget, map, counter);
-      if (*ptarget != old && NTYPE(*ptarget) == N_QUALIFIER) {
-       onig_reduce_nested_qualifier(node, *ptarget);
+      if (*ptarget != old && NTYPE(*ptarget) == N_QUANTIFIER) {
+       onig_reduce_nested_quantifier(node, *ptarget);
       }
     }
     break;
@@ -1550,8 +1819,8 @@ renumber_by_map(Node* node, GroupNumRemap* map)
       r = renumber_by_map(NCONS(node).left, map);
     } while (r == 0 && IS_NOT_NULL(node = NCONS(node).right));
     break;
-  case N_QUALIFIER:
-    r = renumber_by_map(NQUALIFIER(node).target, map);
+  case N_QUANTIFIER:
+    r = renumber_by_map(NQUANTIFIER(node).target, map);
     break;
   case N_EFFECT:
     r = renumber_by_map(NEFFECT(node).target, map);
@@ -1580,8 +1849,8 @@ numbered_ref_check(Node* node)
       r = numbered_ref_check(NCONS(node).left);
     } while (r == 0 && IS_NOT_NULL(node = NCONS(node).right));
     break;
-  case N_QUALIFIER:
-    r = numbered_ref_check(NQUALIFIER(node).target);
+  case N_QUANTIFIER:
+    r = numbered_ref_check(NQUANTIFIER(node).target);
     break;
   case N_EFFECT:
     r = numbered_ref_check(NEFFECT(node).target);
@@ -1662,7 +1931,7 @@ unset_addr_list_fix(UnsetAddrList* uslist, regex_t* reg)
 
 #ifdef USE_INFINITE_REPEAT_MONOMANIAC_MEM_STATUS_CHECK
 static int
-qualifiers_memory_node_info(Node* node)
+quantifiers_memory_node_info(Node* node)
 {
   int r = 0;
 
@@ -1672,7 +1941,7 @@ qualifiers_memory_node_info(Node* node)
     {
       int v;
       do {
-       v = qualifiers_memory_node_info(NCONS(node).left);
+       v = quantifiers_memory_node_info(NCONS(node).left);
        if (v > r) r = v;
       } while (v >= 0 && IS_NOT_NULL(node = NCONS(node).right));
     }
@@ -1684,15 +1953,15 @@ qualifiers_memory_node_info(Node* node)
       return NQ_TARGET_IS_EMPTY_REC; /* tiny version */
     }
     else
-      r = qualifiers_memory_node_info(NCALL(node).target);
+      r = quantifiers_memory_node_info(NCALL(node).target);
     break;
 #endif
 
-  case N_QUALIFIER:
+  case N_QUANTIFIER:
     {
-      QualifierNode* qn = &(NQUALIFIER(node));
+      QuantifierNode* qn = &(NQUANTIFIER(node));
       if (qn->upper != 0) {
-       r = qualifiers_memory_node_info(qn->target);
+       r = quantifiers_memory_node_info(qn->target);
       }
     }
     break;
@@ -1707,7 +1976,7 @@ qualifiers_memory_node_info(Node* node)
 
       case EFFECT_OPTION:
       case EFFECT_STOP_BACKTRACK:
-       r = qualifiers_memory_node_info(en->target);
+       r = quantifiers_memory_node_info(en->target);
        break;
       default:
        break;
@@ -1812,9 +2081,9 @@ get_min_match_length(Node* node, OnigDistance *min, ScanEnv* env)
     *min = 1;
     break;
 
-  case N_QUALIFIER:
+  case N_QUANTIFIER:
     {
-      QualifierNode* qn = &(NQUALIFIER(node));
+      QuantifierNode* qn = &(NQUANTIFIER(node));
 
       if (qn->lower > 0) {
        r = get_min_match_length(qn->target, min, env);
@@ -1933,9 +2202,9 @@ get_max_match_length(Node* node, OnigDistance *max, ScanEnv* env)
     break;
 #endif
 
-  case N_QUALIFIER:
+  case N_QUANTIFIER:
     {
-      QualifierNode* qn = &(NQUALIFIER(node));
+      QuantifierNode* qn = &(NQUANTIFIER(node));
 
       if (qn->upper != 0) {
        r = get_max_match_length(qn->target, max, env);
@@ -2040,9 +2309,9 @@ get_char_length_tree1(Node* node, regex_t* reg, int* len, int level)
     }
     break;
 
-  case N_QUALIFIER:
+  case N_QUANTIFIER:
     {
-      QualifierNode* qn = &(NQUALIFIER(node));
+      QuantifierNode* qn = &(NQUANTIFIER(node));
       if (qn->lower == qn->upper) {
        r = get_char_length_tree1(qn->target, reg, &tlen, level);
        if (r == 0)
@@ -2120,29 +2389,6 @@ get_char_length_tree(Node* node, regex_t* reg, int* len)
   return get_char_length_tree1(node, reg, len, 0);
 }
 
-extern int
-onig_is_code_in_cc(OnigEncoding enc, OnigCodePoint code, CClassNode* cc)
-{
-  int found;
-
-  if (ONIGENC_MBC_MINLEN(enc) > 1 || (code >= SINGLE_BYTE_SIZE)) {
-    if (IS_NULL(cc->mbuf)) {
-      found = 0;
-    }
-    else {
-      found = (onig_is_in_code_range(cc->mbuf->p, code) != 0 ? 1 : 0);
-    }
-  }
-  else {
-    found = (BITSET_AT(cc->bs, code) == 0 ? 0 : 1);
-  }
-
-  if (IS_CCLASS_NOT(cc))
-    return !found;
-  else
-    return found;
-}
-
 /* x is not included y ==>  1 : 0 */
 static int
 is_not_included(Node* x, Node* y, regex_t* reg)
@@ -2375,9 +2621,9 @@ get_head_value_node(Node* node, int exact, regex_t* reg)
     }
     break;
 
-  case N_QUALIFIER:
+  case N_QUANTIFIER:
     {
-      QualifierNode* qn = &(NQUALIFIER(node));
+      QuantifierNode* qn = &(NQUANTIFIER(node));
       if (qn->lower > 0) {
        if (IS_NOT_NULL(qn->head_exact))
          n = qn->head_exact;
@@ -2438,8 +2684,8 @@ check_type_tree(Node* node, int type_mask, int effect_mask, int anchor_mask)
     } while (r == 0 && IS_NOT_NULL(node = NCONS(node).right));
     break;
 
-  case N_QUALIFIER:
-    r = check_type_tree(NQUALIFIER(node).target, type_mask, effect_mask,
+  case N_QUANTIFIER:
+    r = check_type_tree(NQUANTIFIER(node).target, type_mask, effect_mask,
                        anchor_mask);
     break;
 
@@ -2514,8 +2760,11 @@ subexp_inf_recursive_check(Node* node, ScanEnv* env, int head)
     }
     break;
 
-  case N_QUALIFIER:
-    r = subexp_inf_recursive_check(NQUALIFIER(node).target, env, head);
+  case N_QUANTIFIER:
+    r = subexp_inf_recursive_check(NQUANTIFIER(node).target, env, head);
+    if (r == RECURSION_EXIST) {
+      if (NQUANTIFIER(node).lower == 0) r = 0;
+    }
     break;
 
   case N_ANCHOR:
@@ -2570,8 +2819,8 @@ subexp_inf_recursive_check_trav(Node* node, ScanEnv* env)
     } while (r == 0 && IS_NOT_NULL(node = NCONS(node).right));
     break;
 
-  case N_QUALIFIER:
-    r = subexp_inf_recursive_check_trav(NQUALIFIER(node).target, env);
+  case N_QUANTIFIER:
+    r = subexp_inf_recursive_check_trav(NQUANTIFIER(node).target, env);
     break;
 
   case N_ANCHOR:
@@ -2625,8 +2874,8 @@ subexp_recursive_check(Node* node)
     } while (IS_NOT_NULL(node = NCONS(node).right));
     break;
 
-  case N_QUALIFIER:
-    r = subexp_recursive_check(NQUALIFIER(node).target);
+  case N_QUANTIFIER:
+    r = subexp_recursive_check(NQUANTIFIER(node).target);
     break;
 
   case N_ANCHOR:
@@ -2690,11 +2939,11 @@ subexp_recursive_check_trav(Node* node, ScanEnv* env)
     }
     break;
 
-  case N_QUALIFIER:
-    r = subexp_recursive_check_trav(NQUALIFIER(node).target, env);
-    if (NQUALIFIER(node).upper == 0) {
+  case N_QUANTIFIER:
+    r = subexp_recursive_check_trav(NQUANTIFIER(node).target, env);
+    if (NQUANTIFIER(node).upper == 0) {
       if (r == FOUND_CALLED_NODE)
-       NQUALIFIER(node).is_refered = 1;
+       NQUANTIFIER(node).is_refered = 1;
     }
     break;
 
@@ -2757,8 +3006,8 @@ setup_subexp_call(Node* node, ScanEnv* env)
     } while (r == 0 && IS_NOT_NULL(node = NCONS(node).right));
     break;
 
-  case N_QUALIFIER:
-    r = setup_subexp_call(NQUALIFIER(node).target, env);
+  case N_QUANTIFIER:
+    r = setup_subexp_call(NQUANTIFIER(node).target, env);
     break;
   case N_EFFECT:
     r = setup_subexp_call(NEFFECT(node).target, env);
@@ -2907,10 +3156,10 @@ next_setup(Node* node, Node* next_node, regex_t* reg)
 
  retry:
   type = NTYPE(node);
-  if (type == N_QUALIFIER) {
-    QualifierNode* qn = &(NQUALIFIER(node));
+  if (type == N_QUANTIFIER) {
+    QuantifierNode* qn = &(NQUANTIFIER(node));
     if (qn->greedy && IS_REPEAT_INFINITE(qn->upper)) {
-#ifdef USE_QUALIFIER_PEEK_NEXT
+#ifdef USE_QUANTIFIER_PEEK_NEXT
       qn->next_head_exact = get_head_value_node(next_node, 1, reg);
 #endif
       /* automatic posseivation a*b ==> (?>a*)b */
@@ -2943,15 +3192,55 @@ next_setup(Node* node, Node* next_node, regex_t* reg)
   return 0;
 }
 
+
+static int
+divide_ambig_string_node_sub(regex_t* reg, int prev_ambig,
+                             UChar* prev_start, UChar* prev,
+                             UChar* end, Node*** tailp, Node** root)
+{
+  UChar *tmp, *wp;
+  Node* snode;
+
+  if (prev_ambig != 0) {
+    tmp = prev_start;
+    wp  = prev_start;
+    while (tmp < prev) {
+      wp += ONIGENC_MBC_TO_NORMALIZE(reg->enc, reg->ambig_flag,
+                                     &tmp, end, wp);
+    }
+    snode = onig_node_new_str(prev_start, wp);
+    CHECK_NULL_RETURN_VAL(snode, ONIGERR_MEMORY);
+    NSTRING_SET_AMBIG(snode);
+    if (wp != prev) NSTRING_SET_AMBIG_REDUCE(snode);
+  }
+  else {
+    snode = onig_node_new_str(prev_start, prev);
+    CHECK_NULL_RETURN_VAL(snode, ONIGERR_MEMORY);
+  }
+
+  if (*tailp == (Node** )0) {
+    *root = onig_node_new_list(snode, NULL);
+    CHECK_NULL_RETURN_VAL(*root, ONIGERR_MEMORY);
+    *tailp = &(NCONS(*root).right);
+  }
+  else {
+    **tailp = onig_node_new_list(snode, NULL);
+    CHECK_NULL_RETURN_VAL(**tailp, ONIGERR_MEMORY);
+    *tailp = &(NCONS(**tailp).right);
+  }
+
+  return 0;
+}
+
 static int
 divide_ambig_string_node(Node* node, regex_t* reg)
 {
   StrNode* sn = &NSTRING(node);
   int ambig, prev_ambig;
   UChar *prev, *p, *end, *prev_start, *start, *tmp, *wp;
-  Node *snode;
   Node *root = NULL_NODE;
   Node **tailp = (Node** )0;
+  int r;
 
   start = prev_start = p = sn->s;
   end  = sn->end;
@@ -2964,33 +3253,9 @@ divide_ambig_string_node(Node* node, regex_t* reg)
     if (prev_ambig != (ambig = ONIGENC_IS_MBC_AMBIGUOUS(reg->enc,
                                               reg->ambig_flag, &p, end))) {
 
-      if (prev_ambig != 0) {
-        tmp = prev_start;
-        wp  = prev_start;
-        while (tmp < prev) {
-          wp += ONIGENC_MBC_TO_NORMALIZE(reg->enc, reg->ambig_flag,
-                                         &tmp, end, wp);
-        }
-        snode = onig_node_new_str(prev_start, wp);
-        CHECK_NULL_RETURN_VAL(snode, ONIGERR_MEMORY);
-        NSTRING_SET_AMBIG(snode);
-        if (wp != prev) NSTRING_SET_AMBIG_REDUCE(snode);
-      }
-      else {
-        snode = onig_node_new_str(prev_start, prev);
-        CHECK_NULL_RETURN_VAL(snode, ONIGERR_MEMORY);
-      }
-
-      if (tailp == (Node** )0) {
-        root = onig_node_new_list(snode, NULL);
-       CHECK_NULL_RETURN_VAL(root, ONIGERR_MEMORY);
-       tailp = &(NCONS(root).right);
-      }
-      else {
-       *tailp = onig_node_new_list(snode, NULL);
-       CHECK_NULL_RETURN_VAL(*tailp, ONIGERR_MEMORY);
-       tailp = &(NCONS(*tailp).right);
-      }
+      r = divide_ambig_string_node_sub(reg, prev_ambig, prev_start, prev,
+                                       end, &tailp, &root);
+      if (r != 0) return r;
 
       prev_ambig = ambig;
       prev_start = prev;
@@ -3011,41 +3276,157 @@ divide_ambig_string_node(Node* node, regex_t* reg)
     }
   }
   else {
-    if (prev_ambig != 0) {
-      tmp = prev_start;
-      wp  = prev_start;
-      while (tmp < end) {
-        wp += ONIGENC_MBC_TO_NORMALIZE(reg->enc, reg->ambig_flag,
-                                       &tmp, end, wp);
-      }
-      snode = onig_node_new_str(prev_start, wp);
-      CHECK_NULL_RETURN_VAL(snode, ONIGERR_MEMORY);
-      NSTRING_SET_AMBIG(snode);
-      if (wp != end) NSTRING_SET_AMBIG_REDUCE(snode);
+    r = divide_ambig_string_node_sub(reg, prev_ambig, prev_start, end,
+                                     end, &tailp, &root);
+    if (r != 0) return r;
+
+    swap_node(node, root);
+    onig_node_str_clear(root); /* should be after swap! */
+    onig_node_free(root);      /* free original string node */
+  }
+
+  return 0;
+}
+
+#ifdef USE_COMBINATION_EXPLOSION_CHECK
+
+#define CEC_THRES_NUM_BIG_REPEAT         512
+#define CEC_INFINITE_NUM          0x7fffffff
+
+#define CEC_IN_INFINITE_REPEAT    (1<<0)
+#define CEC_IN_FINITE_REPEAT      (1<<1)
+#define CEC_CONT_BIG_REPEAT       (1<<2)
+
+static int
+setup_comb_exp_check(Node* node, int state, ScanEnv* env)
+{
+  int type;
+  int r = state;
+
+  type = NTYPE(node);
+  switch (type) {
+  case N_LIST:
+    {
+      Node* prev = NULL_NODE;
+      do {
+       r = setup_comb_exp_check(NCONS(node).left, r, env);
+       prev = NCONS(node).left;
+      } while (r >= 0 && IS_NOT_NULL(node = NCONS(node).right));
     }
-    else {
-      snode = onig_node_new_str(prev_start, end);
-      CHECK_NULL_RETURN_VAL(snode, ONIGERR_MEMORY);
+    break;
+
+  case N_ALT:
+    {
+      int ret;
+      do {
+       ret = setup_comb_exp_check(NCONS(node).left, state, env);
+       r |= ret;
+      } while (ret >= 0 && IS_NOT_NULL(node = NCONS(node).right));
     }
+    break;
 
-    if (tailp == (Node** )0) {
-      root = onig_node_new_list(snode, NULL);
-      CHECK_NULL_RETURN_VAL(root, ONIGERR_MEMORY);
-      tailp = &(NCONS(node).right);
+  case N_QUANTIFIER:
+    {
+      int child_state = state;
+      int add_state = 0;
+      QuantifierNode* qn = &(NQUANTIFIER(node));
+      Node* target = qn->target;
+      int var_num;
+
+      if (! IS_REPEAT_INFINITE(qn->upper)) {
+       if (qn->upper > 1) {
+         /* {0,1}, {1,1} are allowed */
+         child_state |= CEC_IN_FINITE_REPEAT;
+
+         /* check (a*){n,m}, (a+){n,m} => (a*){n,n}, (a+){n,n} */
+         if (env->backrefed_mem == 0) {
+           if (NTYPE(qn->target) == N_EFFECT) {
+             EffectNode* en = &(NEFFECT(qn->target));
+             if (en->type == EFFECT_MEMORY) {
+               if (NTYPE(en->target) == N_QUANTIFIER) {
+                 QuantifierNode* q = &(NQUANTIFIER(en->target));
+                 if (IS_REPEAT_INFINITE(q->upper)
+                     && q->greedy == qn->greedy) {
+                   qn->upper = (qn->lower == 0 ? 1 : qn->lower);
+                   if (qn->upper == 1)
+                     child_state = state;
+                 }
+               }
+             }
+           }
+         }
+       }
+      }
+
+      if (state & CEC_IN_FINITE_REPEAT) {
+       qn->comb_exp_check_num = -1;
+      }
+      else {
+       if (IS_REPEAT_INFINITE(qn->upper)) {
+         var_num = CEC_INFINITE_NUM;
+         child_state |= CEC_IN_INFINITE_REPEAT;
+       }
+       else {
+         var_num = qn->upper - qn->lower;
+       }
+
+       if (var_num >= CEC_THRES_NUM_BIG_REPEAT)
+         add_state |= CEC_CONT_BIG_REPEAT;
+
+       if (((state & CEC_IN_INFINITE_REPEAT) != 0 && var_num != 0) ||
+           ((state & CEC_CONT_BIG_REPEAT) != 0 &&
+            var_num >= CEC_THRES_NUM_BIG_REPEAT)) {
+         if (qn->comb_exp_check_num == 0) {
+           env->num_comb_exp_check++;
+           qn->comb_exp_check_num = env->num_comb_exp_check;
+           if (env->curr_max_regnum > env->comb_exp_max_regnum)
+             env->comb_exp_max_regnum = env->curr_max_regnum;
+         }
+       }
+      }
+
+      r = setup_comb_exp_check(target, child_state, env);
+      r |= add_state;
     }
-    else {
-      *tailp = onig_node_new_list(snode, NULL);
-      CHECK_NULL_RETURN_VAL(*tailp, ONIGERR_MEMORY);
-      tailp = &(NCONS(*tailp).right);
+    break;
+
+  case N_EFFECT:
+    {
+      EffectNode* en = &(NEFFECT(node));
+
+      switch (en->type) {
+      case EFFECT_MEMORY:
+       {
+         if (env->curr_max_regnum < en->regnum)
+           env->curr_max_regnum = en->regnum;
+
+         r = setup_comb_exp_check(en->target, state, env);
+       }
+       break;
+
+      default:
+       r = setup_comb_exp_check(en->target, state, env);
+       break;
+      }
     }
+    break;
 
-    swap_node(node, root);
-    onig_node_str_clear(root); /* should be after swap! */
-    onig_node_free(root);      /* free original string node */
+#ifdef USE_SUBEXP_CALL
+  case N_CALL:
+    if (IS_CALL_RECURSION(&(NCALL(node))))
+      env->has_recursion = 1;
+    else
+      r = setup_comb_exp_check(NCALL(node).target, state, env);
+    break;
+#endif
+
+  default:
+    break;
   }
 
-  return 0;
+  return r;
 }
+#endif
 
 #define IN_ALT        (1<<0)
 #define IN_NOT        (1<<1)
@@ -3116,15 +3497,20 @@ setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env)
        if (p[i] > env->num_mem)  return ONIGERR_INVALID_BACKREF;
        BIT_STATUS_ON_AT(env->backrefed_mem, p[i]);
        BIT_STATUS_ON_AT(env->bt_mem_start, p[i]);
+#ifdef USE_BACKREF_AT_LEVEL
+       if (IS_BACKREF_NEST_LEVEL(br)) {
+         BIT_STATUS_ON_AT(env->bt_mem_end, p[i]);
+       }
+#endif
        SET_EFFECT_STATUS(nodes[p[i]], NST_MEM_BACKREFED);
       }
     }
     break;
 
-  case N_QUALIFIER:
+  case N_QUANTIFIER:
     {
       OnigDistance d;
-      QualifierNode* qn = &(NQUALIFIER(node));
+      QuantifierNode* qn = &(NQUANTIFIER(node));
       Node* target = qn->target;
 
       if ((state & IN_REPEAT) != 0) {
@@ -3137,7 +3523,7 @@ setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env)
        if (d == 0) {
          qn->target_empty_info = NQ_TARGET_IS_EMPTY;
 #ifdef USE_INFINITE_REPEAT_MONOMANIAC_MEM_STATUS_CHECK
-         r = qualifiers_memory_node_info(target);
+         r = quantifiers_memory_node_info(target);
          if (r < 0) break;
          if (r > 0) {
            qn->target_empty_info = r;
@@ -3179,15 +3565,15 @@ setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env)
              if (r) break;
            }
            onig_node_free(target);
-           break; /* break case N_QUALIFIER: */
+           break; /* break case N_QUANTIFIER: */
          }
        }
       }
 
 #ifdef USE_OP_PUSH_OR_JUMP_EXACT
       if (qn->greedy && (qn->target_empty_info != 0)) {
-       if (NTYPE(target) == N_QUALIFIER) {
-         QualifierNode* tqn = &(NQUALIFIER(target));
+       if (NTYPE(target) == N_QUANTIFIER) {
+         QuantifierNode* tqn = &(NQUANTIFIER(target));
          if (IS_NOT_NULL(tqn->head_exact)) {
            qn->head_exact  = tqn->head_exact;
            tqn->head_exact = NULL;
@@ -3227,8 +3613,8 @@ setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env)
        {
          Node* target = en->target;
          r = setup_tree(target, reg, state, env);
-         if (NTYPE(target) == N_QUALIFIER) {
-           QualifierNode* tqn = &(NQUALIFIER(target));
+         if (NTYPE(target) == N_QUANTIFIER) {
+           QuantifierNode* tqn = &(NQUANTIFIER(target));
            if (IS_REPEAT_INFINITE(tqn->upper) && tqn->lower <= 1 &&
                tqn->greedy != 0) {  /* (?>a*), a*+ etc... */
              int qtype = NTYPE(tqn->target);
@@ -3257,17 +3643,15 @@ setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env)
 /* allowed node types in look-behind */
 #define ALLOWED_TYPE_IN_LB  \
   ( N_LIST | N_ALT | N_STRING | N_CCLASS | N_CTYPE | \
-    N_ANYCHAR | N_ANCHOR | N_EFFECT | N_QUALIFIER | N_CALL )
+    N_ANYCHAR | N_ANCHOR | N_EFFECT | N_QUANTIFIER | N_CALL )
 
 #define ALLOWED_EFFECT_IN_LB       ( EFFECT_MEMORY )
 #define ALLOWED_EFFECT_IN_LB_NOT   0
 
 #define ALLOWED_ANCHOR_IN_LB \
-( ANCHOR_LOOK_BEHIND | ANCHOR_BEGIN_LINE | ANCHOR_END_LINE | ANCHOR_BEGIN_BUF )
+( ANCHOR_LOOK_BEHIND | ANCHOR_BEGIN_LINE | ANCHOR_END_LINE | ANCHOR_BEGIN_BUF | ANCHOR_BEGIN_POSITION )
 #define ALLOWED_ANCHOR_IN_LB_NOT \
-( ANCHOR_LOOK_BEHIND_NOT | ANCHOR_BEGIN_LINE | ANCHOR_END_LINE | ANCHOR_BEGIN_BUF )
-       /* can't allow all anchors, because \G in look-behind through Search().
-          ex. /(?<=\G)zz/.match("azz") => success. */
+( ANCHOR_LOOK_BEHIND | ANCHOR_LOOK_BEHIND_NOT | ANCHOR_BEGIN_LINE | ANCHOR_END_LINE | ANCHOR_BEGIN_BUF | ANCHOR_BEGIN_POSITION )
 
       case ANCHOR_LOOK_BEHIND:
        {
@@ -3383,7 +3767,7 @@ typedef struct {
 static int
 map_position_value(OnigEncoding enc, int i)
 {
-  static short int ByteValTable[] = {
+  static const short int ByteValTable[] = {
      5,  1,  1,  1,  1,  1,  1,  1,  1, 10, 10,  1,  1, 10,  1,  1,
      1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,
     12,  4,  7,  4,  4,  4,  4,  4,  4,  5,  5,  5,  5,  5,  5,  5,
@@ -3408,7 +3792,7 @@ static int
 distance_value(MinMaxLen* mm)
 {
   /* 1000 / (min-max-dist + 1) */
-  static short int dist_vals[] = {
+  static const short int dist_vals[] = {
     1000,  500,  333,  250,  200,  167,  143,  125,  111,  100, 
       91,   83,   77,   71,   67,   63,   59,   56,   53,   50, 
       48,   45,   43,   42,   40,   38,   37,   36,   34,   33, 
@@ -3604,9 +3988,10 @@ copy_opt_exact_info(OptExactInfo* to, OptExactInfo* from)
 }
 
 static void
-concat_opt_exact_info(OptExactInfo* to, OptExactInfo* add)
+concat_opt_exact_info(OptExactInfo* to, OptExactInfo* add, OnigEncoding enc)
 {
-  int i, n;
+  int i, j, len;
+  UChar *p, *end;
   OptAncInfo tanc;
 
   if (! to->ignore_case && add->ignore_case) {
@@ -3615,11 +4000,17 @@ concat_opt_exact_info(OptExactInfo* to, OptExactInfo* add)
     to->ignore_case = 1;
   }
 
-  for (i = to->len, n = 0; n < add->len && i < OPT_EXACT_MAXLEN; i++, n++)
-    to->s[i] = add->s[n];
+  p = add->s;
+  end = p + add->len;
+  for (i = to->len; p < end; ) {
+    len = enc_len(enc, p);
+    if (i + len > OPT_EXACT_MAXLEN) break;
+    for (j = 0; j < len && p < end; j++)
+      to->s[i++] = *p++;
+  }
 
   to->len = i;
-  to->reach_end = (n == add->len ? add->reach_end : 0);
+  to->reach_end = (p == end ? add->reach_end : 0);
 
   concat_opt_anc_info(&tanc, &to->anc, &add->anc, 1, 1);
   if (! to->reach_end) tanc.right_anchor = 0;
@@ -3634,15 +4025,10 @@ concat_opt_exact_info_str(OptExactInfo* to,
   UChar *p;
 
   for (i = to->len, p = s; p < end && i < OPT_EXACT_MAXLEN; ) {
-    if (raw) {
+    len = enc_len(enc, p);
+    if (i + len > OPT_EXACT_MAXLEN) break;
+    for (j = 0; j < len && p < end; j++)
       to->s[i++] = *p++;
-    }
-    else {
-      len = enc_len(enc, p);
-      if (i + len > OPT_EXACT_MAXLEN) break;
-      for (j = 0; j < len; j++)
-       to->s[i++] = *p++;
-    }
   }
 
   to->len = i;
@@ -3692,7 +4078,14 @@ select_opt_exact_info(OnigEncoding enc, OptExactInfo* now, OptExactInfo* alt)
   v1 = now->len;
   v2 = alt->len;
 
-  if (v1 <= 2 && v2 <= 2) {
+  if (v2 == 0) {
+    return ;
+  }
+  else if (v1 == 0) {
+    copy_opt_exact_info(now, alt);
+    return ;
+  }
+  else if (v1 <= 2 && v2 <= 2) {
     /* ByteValTable[x] is big value --> low price */
     v2 = map_position_value(enc, now->s[0]);
     v1 = map_position_value(enc, alt->s[0]);
@@ -3711,7 +4104,7 @@ select_opt_exact_info(OnigEncoding enc, OptExactInfo* now, OptExactInfo* alt)
 static void
 clear_opt_map_info(OptMapInfo* map)
 {
-  static OptMapInfo clean_info = {
+  static const OptMapInfo clean_info = {
     {0, 0}, {0, 0}, 0,
     {
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
@@ -3755,11 +4148,10 @@ static int
 add_char_amb_opt_map_info(OptMapInfo* map, UChar* p, UChar* end,
                           OnigEncoding enc, OnigAmbigType ambig_flag)
 {
-  int i, j, n, len;
+  int i, n, len;
   UChar buf[ONIGENC_MBC_NORMALIZE_MAXLEN];
-  OnigCodePoint code, ccode;
-  OnigCompAmbigCodes* ccs;
-  OnigPairAmbigCodes* pccs;
+  OnigCodePoint code;
+  const OnigPairAmbigCodes* pccs;
   OnigAmbigType amb;
 
   add_char_opt_map_info(map, p[0], enc);
@@ -3776,21 +4168,6 @@ add_char_amb_opt_map_info(OptMapInfo* map, UChar* p, UChar* end,
         add_char_opt_map_info(map, buf[0], enc);
       }
     }
-
-    if ((ambig_flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0) {
-      n = ONIGENC_GET_ALL_COMP_AMBIG_CODES(enc, amb, &ccs);
-      for (i = 0; i < n; i++) {
-        if (ccs[i].code == code) {
-          for (j = 0; j < ccs[i].n; j++) {
-            ccode = ccs[i].items[j].code[0];
-            len = ONIGENC_CODE_TO_MBC(enc, ccode, buf);
-            if (len < 0) return len;
-            add_char_opt_map_info(map, buf[0], enc);
-          }
-          break;
-        }
-      }
-    }
   }
   return 0;
 }
@@ -3907,11 +4284,11 @@ concat_left_node_opt_info(OnigEncoding enc, NodeOptInfo* to, NodeOptInfo* add)
 
   if (add->exb.len > 0) {
     if (exb_reach) {
-      concat_opt_exact_info(&to->exb, &add->exb);
+      concat_opt_exact_info(&to->exb, &add->exb, enc);
       clear_opt_exact_info(&add->exb);
     }
     else if (exm_reach) {
-      concat_opt_exact_info(&to->exm, &add->exb);
+      concat_opt_exact_info(&to->exm, &add->exb, enc);
       clear_opt_exact_info(&add->exb);
     }
   }
@@ -4184,12 +4561,12 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env)
     break;
 #endif
 
-  case N_QUALIFIER:
+  case N_QUANTIFIER:
     {
       int i;
       OnigDistance min, max;
       NodeOptInfo nopt;
-      QualifierNode* qn = &(NQUALIFIER(node));
+      QuantifierNode* qn = &(NQUANTIFIER(node));
 
       r = optimize_node_left(qn->target, &nopt, env);
       if (r) break;
@@ -4197,8 +4574,8 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env)
       if (qn->lower == 0 && IS_REPEAT_INFINITE(qn->upper)) {
        if (env->mmd.max == 0 &&
            NTYPE(qn->target) == N_ANYCHAR && qn->greedy) {
-         if (IS_POSIXLINE(env->options))
-           add_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR_PL);
+         if (IS_MULTILINE(env->options))
+           add_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR_ML);
          else
            add_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR);
        }
@@ -4210,7 +4587,7 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env)
            if (nopt.exb.reach_end) {
              for (i = 2; i < qn->lower &&
                          ! is_full_opt_exact_info(&opt->exb); i++) {
-               concat_opt_exact_info(&opt->exb, &nopt.exb);
+               concat_opt_exact_info(&opt->exb, &nopt.exb, env->enc);
              }
              if (i < qn->lower) {
                opt->exb.reach_end = 0;
@@ -4316,10 +4693,7 @@ set_optimize_exact_info(regex_t* reg, OptExactInfo* e)
     CHECK_NULL_RETURN_VAL(reg->exact, ONIGERR_MEMORY);
     reg->exact_end = reg->exact + e->len;
  
-    if (e->anc.left_anchor & ANCHOR_BEGIN_LINE)
-      allow_reverse = 1;
-    else
-      allow_reverse =
+    allow_reverse =
        ONIGENC_IS_ALLOWED_REVERSE_MATCH(reg->enc, reg->exact, reg->exact_end);
 
     if (e->len >= 3 || (e->len >= 2 && allow_reverse)) {
@@ -4391,7 +4765,7 @@ set_optimize_info_from_tree(Node* node, regex_t* reg, ScanEnv* scan_env)
   if (r) return r;
 
   reg->anchor = opt.anc.left_anchor & (ANCHOR_BEGIN_BUF |
-        ANCHOR_BEGIN_POSITION | ANCHOR_ANYCHAR_STAR | ANCHOR_ANYCHAR_STAR_PL);
+        ANCHOR_BEGIN_POSITION | ANCHOR_ANYCHAR_STAR | ANCHOR_ANYCHAR_STAR_ML);
 
   reg->anchor |= opt.anc.right_anchor & (ANCHOR_END_BUF | ANCHOR_SEMI_END_BUF);
 
@@ -4446,6 +4820,38 @@ clear_optimize_info(regex_t* reg)
 
 #ifdef ONIG_DEBUG
 
+static void print_enc_string(FILE* fp, OnigEncoding enc,
+                            const UChar *s, const UChar *end)
+{
+  fprintf(fp, "\nPATTERN: /");
+
+  if (ONIGENC_MBC_MINLEN(enc) > 1) {
+    const UChar *p;
+    OnigCodePoint code;
+
+    p = s;
+    while (p < end) {
+      code = ONIGENC_MBC_TO_CODE(enc, p, end);
+      if (code >= 0x80) {
+       fprintf(fp, " 0x%04x ", (int )code);
+      }
+      else {
+       fputc((int )code, fp);
+      }
+
+      p += enc_len(enc, p);
+    }
+  }
+  else {
+    while (s < end) {
+      fputc((int )*s, fp);
+      s++;
+    }
+  }
+
+  fprintf(fp, "/\n");
+}
+
 static void
 print_distance_range(FILE* f, OnigDistance a, OnigDistance b)
 {
@@ -4503,7 +4909,7 @@ print_anchor(FILE* f, int anchor)
     q = 1;
     fprintf(f, "anychar-star");
   }
-  if (anchor & ANCHOR_ANYCHAR_STAR_PL) {
+  if (anchor & ANCHOR_ANYCHAR_STAR_ML) {
     if (q) fprintf(f, ", ");
     fprintf(f, "anychar-star-pl");
   }
@@ -4514,8 +4920,8 @@ print_anchor(FILE* f, int anchor)
 static void
 print_optimize_info(FILE* f, regex_t* reg)
 {
-  static char* on[] = { "NONE", "EXACT", "EXACT_BM", "EXACT_BM_NOT_REV",
-                       "EXACT_IC", "MAP" };
+  static const char* on[] = { "NONE", "EXACT", "EXACT_BM", "EXACT_BM_NOT_REV",
+                              "EXACT_IC", "MAP" };
 
   fprintf(f, "optimize: %s\n", on[reg->optimize]);
   fprintf(f, "  anchor: "); print_anchor(f, reg->anchor);
@@ -4624,7 +5030,6 @@ onig_chain_reduce(regex_t* reg)
 {
   regex_t *head, *prev;
 
-  THREAD_ATOMIC_START;
   prev = reg;
   head = prev->chain;
   if (IS_NOT_NULL(head)) {
@@ -4636,7 +5041,6 @@ onig_chain_reduce(regex_t* reg)
     prev->chain = (regex_t* )NULL;
     REGEX_TRANSFER(reg, head);
   }
-  THREAD_ATOMIC_END;
 }
 
 #if 0
@@ -4739,6 +5143,10 @@ onig_compile(regex_t* reg, const UChar* pattern, const UChar* pattern_end,
 
   reg->state = ONIG_STATE_COMPILING;
 
+#ifdef ONIG_DEBUG
+  print_enc_string(stderr, reg->enc, pattern, pattern_end);
+#endif
+
   if (reg->alloc == 0) {
     init_size = (pattern_end - pattern) * 2;
     if (init_size <= 0) init_size = COMPILE_INIT_SIZE;
@@ -4753,6 +5161,9 @@ onig_compile(regex_t* reg, const UChar* pattern, const UChar* pattern_end,
   reg->num_null_check     = 0;
   reg->repeat_range_alloc = 0;
   reg->repeat_range       = (OnigRepeatRange* )NULL;
+#ifdef USE_COMBINATION_EXPLOSION_CHECK
+  reg->num_comb_exp_check = 0;
+#endif
 
   r = onig_parse_make_tree(&root, pattern, pattern_end, reg, &scan_env);
   if (r != 0) goto err;
@@ -4806,6 +5217,33 @@ onig_compile(regex_t* reg, const UChar* pattern, const UChar* pattern_end,
     reg->bt_mem_end |= reg->capture_history;
   }
 
+#ifdef USE_COMBINATION_EXPLOSION_CHECK
+  if (scan_env.backrefed_mem == 0
+#ifdef USE_SUBEXP_CALL
+      || scan_env.num_call == 0
+#endif
+      ) {
+    setup_comb_exp_check(root, 0, &scan_env);
+#ifdef USE_SUBEXP_CALL
+    if (scan_env.has_recursion != 0) {
+      scan_env.num_comb_exp_check = 0;
+    }
+    else
+#endif
+    if (scan_env.comb_exp_max_regnum > 0) {
+      int i;
+      for (i = 1; i <= scan_env.comb_exp_max_regnum; i++) {
+       if (BIT_STATUS_AT(scan_env.backrefed_mem, i) != 0) {
+         scan_env.num_comb_exp_check = 0;
+         break;
+       }
+      }
+    }
+  }
+
+  reg->num_comb_exp_check = scan_env.num_comb_exp_check;
+#endif
+
   clear_optimize_info(reg);
 #ifndef ONIG_DONT_OPTIMIZE
   r = set_optimize_info_from_tree(root, reg, &scan_env);
@@ -4864,6 +5302,7 @@ onig_compile(regex_t* reg, const UChar* pattern, const UChar* pattern_end,
  err:
   if (IS_NOT_NULL(scan_env.error)) {
     if (IS_NOT_NULL(einfo)) {
+      einfo->enc     = scan_env.enc;
       einfo->par     = scan_env.error;
       einfo->par_end = scan_env.error_end;
     }
@@ -4875,6 +5314,7 @@ onig_compile(regex_t* reg, const UChar* pattern, const UChar* pattern_end,
   return r;
 }
 
+#ifdef USE_RECOMPILE_API
 extern int
 onig_recompile(regex_t* reg, const UChar* pattern, const UChar* pattern_end,
            OnigOptionType option, OnigEncoding enc, OnigSyntaxType* syntax,
@@ -4893,6 +5333,7 @@ onig_recompile(regex_t* reg, const UChar* pattern, const UChar* pattern_end,
   }
   return 0;
 }
+#endif
 
 static int onig_inited = 0;
 
@@ -4906,6 +5347,11 @@ onig_alloc_init(regex_t** reg, OnigOptionType option, OnigAmbigType ambig_flag,
   if (ONIGENC_IS_UNDEF(enc))
     return ONIGERR_DEFAULT_ENCODING_IS_NOT_SETTED;
 
+  if ((option & (ONIG_OPTION_DONT_CAPTURE_GROUP|ONIG_OPTION_CAPTURE_GROUP))
+      == (ONIG_OPTION_DONT_CAPTURE_GROUP|ONIG_OPTION_CAPTURE_GROUP)) {
+    return ONIGERR_INVALID_COMBINATION_OF_OPTIONS;
+  }
+
   *reg = (regex_t* )xmalloc(sizeof(regex_t));
   if (IS_NULL(*reg)) return ONIGERR_MEMORY;
   (*reg)->state = ONIG_STATE_MODIFY;
@@ -4959,13 +5405,14 @@ onig_new(regex_t** reg, const UChar* pattern, const UChar* pattern_end,
 }
 
 extern int
-onig_init()
+onig_init(void)
 {
   if (onig_inited != 0)
     return 0;
 
   onig_inited = 1;
 
+  THREAD_SYSTEM_INIT;
   THREAD_ATOMIC_START;
 
   onigenc_init();
@@ -4981,9 +5428,9 @@ onig_init()
 
 
 extern int
-onig_end()
+onig_end(void)
 {
-  extern int onig_free_shared_cclass_table();
+  extern int onig_free_shared_cclass_table(void);
 
   THREAD_ATOMIC_START;
 
@@ -4991,23 +5438,34 @@ onig_end()
   onig_print_statistics(stderr);
 #endif
 
-#ifdef USE_RECYCLE_NODE
-  onig_free_node_list();
-#endif
-
 #ifdef USE_SHARED_CCLASS_TABLE
   onig_free_shared_cclass_table();
 #endif
 
+#ifdef USE_RECYCLE_NODE
+  onig_free_node_list();
+#endif
+
   onig_inited = 0;
 
   THREAD_ATOMIC_END;
+  THREAD_SYSTEM_END;
   return 0;
 }
 
 
 #ifdef ONIG_DEBUG
 
+/* arguments type */
+#define ARG_SPECIAL     -1
+#define ARG_NON          0
+#define ARG_RELADDR      1
+#define ARG_ABSADDR      2
+#define ARG_LENGTH       3
+#define ARG_MEMNUM       4
+#define ARG_OPTION       5
+#define ARG_STATE_CHECK  6
+
 OnigOpInfoType OnigOpInfo[] = {
   { OP_FINISH,            "finish",          ARG_NON },
   { OP_END,               "end",             ARG_NON },
@@ -5038,62 +5496,66 @@ OnigOpInfoType OnigOpInfo[] = {
   { OP_ANYCHAR_ML_STAR,   "anychar-ml*",     ARG_NON },
   { OP_ANYCHAR_STAR_PEEK_NEXT, "anychar*-peek-next", ARG_SPECIAL },
   { OP_ANYCHAR_ML_STAR_PEEK_NEXT, "anychar-ml*-peek-next", ARG_SPECIAL },
-  { OP_WORD,              "word",            ARG_NON },
-  { OP_NOT_WORD,          "not-word",        ARG_NON },
-  { OP_WORD_SB,           "word-sb",         ARG_NON },
-  { OP_WORD_MB,           "word-mb",         ARG_NON },
-  { OP_WORD_BOUND,        "word-bound",      ARG_NON },
-  { OP_NOT_WORD_BOUND,    "not-word-bound",  ARG_NON },
-  { OP_WORD_BEGIN,        "word-begin",      ARG_NON },
-  { OP_WORD_END,          "word-end",        ARG_NON },
-  { OP_BEGIN_BUF,         "begin-buf",       ARG_NON },
-  { OP_END_BUF,           "end-buf",         ARG_NON },
-  { OP_BEGIN_LINE,        "begin-line",      ARG_NON },
-  { OP_END_LINE,          "end-line",        ARG_NON },
-  { OP_SEMI_END_BUF,      "semi-end-buf",    ARG_NON },
-  { OP_BEGIN_POSITION,    "begin-position",  ARG_NON },
-  { OP_BACKREF1,          "backref1",        ARG_NON },
-  { OP_BACKREF2,          "backref2",        ARG_NON },
-  { OP_BACKREF3,          "backref3",        ARG_NON },
-  { OP_BACKREFN,          "backrefn",        ARG_MEMNUM  },
-  { OP_BACKREFN_IC,       "backrefn-ic",     ARG_SPECIAL },
-  { OP_BACKREF_MULTI,     "backref_multi",   ARG_SPECIAL },
-  { OP_BACKREF_MULTI_IC,  "backref_multi-ic",ARG_SPECIAL },
-  { OP_MEMORY_START_PUSH, "mem-start-push",  ARG_MEMNUM  },
-  { OP_MEMORY_START,      "mem-start",       ARG_MEMNUM  },
-  { OP_MEMORY_END_PUSH,     "mem-end-push",     ARG_MEMNUM  },
-  { OP_MEMORY_END_PUSH_REC, "mem-end-push-rec", ARG_MEMNUM  },
-  { OP_MEMORY_END,          "mem-end",          ARG_MEMNUM  },
-  { OP_MEMORY_END_REC,      "mem-end-rec",      ARG_MEMNUM  },
-  { OP_SET_OPTION_PUSH,   "set-option-push", ARG_OPTION  },
-  { OP_SET_OPTION,        "set-option",      ARG_OPTION  },
-  { OP_FAIL,              "fail",            ARG_NON },
-  { OP_JUMP,              "jump",            ARG_RELADDR },
-  { OP_PUSH,              "push",            ARG_RELADDR },
-  { OP_POP,               "pop",             ARG_NON },
-  { OP_PUSH_OR_JUMP_EXACT1, "push-or-jump-e1", ARG_SPECIAL },
-  { OP_PUSH_IF_PEEK_NEXT, "push-if-peek-next", ARG_SPECIAL },
-  { OP_REPEAT,            "repeat",          ARG_SPECIAL },
-  { OP_REPEAT_NG,         "repeat-ng",       ARG_SPECIAL },
-  { OP_REPEAT_INC,        "repeat-inc",      ARG_MEMNUM  },
-  { OP_REPEAT_INC_NG,     "repeat-inc-ng",   ARG_MEMNUM  },
-  { OP_REPEAT_INC_SG,     "repeat-inc-sg",    ARG_MEMNUM  },
-  { OP_REPEAT_INC_NG_SG,  "repeat-inc-ng-sg", ARG_MEMNUM  },
-  { OP_NULL_CHECK_START,  "null-check-start",ARG_MEMNUM  },
-  { OP_NULL_CHECK_END,    "null-check-end",  ARG_MEMNUM  },
-  { OP_NULL_CHECK_END_MEMST,"null-check-end-memst",  ARG_MEMNUM  },
-  { OP_NULL_CHECK_END_MEMST_PUSH,"null-check-end-memst-push",  ARG_MEMNUM  },
-  { OP_PUSH_POS,          "push-pos",        ARG_NON },
-  { OP_POP_POS,           "pop-pos",         ARG_NON },
-  { OP_PUSH_POS_NOT,      "push-pos-not",    ARG_RELADDR },
-  { OP_FAIL_POS,          "fail-pos",        ARG_NON },
-  { OP_PUSH_STOP_BT,      "push-stop-bt",    ARG_NON },
-  { OP_POP_STOP_BT,       "pop-stop-bt",     ARG_NON },
-  { OP_LOOK_BEHIND,       "look-behind",     ARG_SPECIAL },
+  { OP_WORD,                "word",            ARG_NON },
+  { OP_NOT_WORD,            "not-word",        ARG_NON },
+  { OP_WORD_BOUND,          "word-bound",      ARG_NON },
+  { OP_NOT_WORD_BOUND,      "not-word-bound",  ARG_NON },
+  { OP_WORD_BEGIN,          "word-begin",      ARG_NON },
+  { OP_WORD_END,            "word-end",        ARG_NON },
+  { OP_BEGIN_BUF,           "begin-buf",       ARG_NON },
+  { OP_END_BUF,             "end-buf",         ARG_NON },
+  { OP_BEGIN_LINE,          "begin-line",      ARG_NON },
+  { OP_END_LINE,            "end-line",        ARG_NON },
+  { OP_SEMI_END_BUF,        "semi-end-buf",    ARG_NON },
+  { OP_BEGIN_POSITION,      "begin-position",  ARG_NON },
+  { OP_BACKREF1,            "backref1",             ARG_NON },
+  { OP_BACKREF2,            "backref2",             ARG_NON },
+  { OP_BACKREFN,            "backrefn",             ARG_MEMNUM  },
+  { OP_BACKREFN_IC,         "backrefn-ic",          ARG_SPECIAL },
+  { OP_BACKREF_MULTI,       "backref_multi",        ARG_SPECIAL },
+  { OP_BACKREF_MULTI_IC,    "backref_multi-ic",     ARG_SPECIAL },
+  { OP_BACKREF_AT_LEVEL,    "backref_at_level",     ARG_SPECIAL },
+  { OP_MEMORY_START_PUSH,   "mem-start-push",       ARG_MEMNUM  },
+  { OP_MEMORY_START,        "mem-start",            ARG_MEMNUM  },
+  { OP_MEMORY_END_PUSH,     "mem-end-push",         ARG_MEMNUM  },
+  { OP_MEMORY_END_PUSH_REC, "mem-end-push-rec",     ARG_MEMNUM  },
+  { OP_MEMORY_END,          "mem-end",              ARG_MEMNUM  },
+  { OP_MEMORY_END_REC,      "mem-end-rec",          ARG_MEMNUM  },
+  { OP_SET_OPTION_PUSH,     "set-option-push",      ARG_OPTION  },
+  { OP_SET_OPTION,          "set-option",           ARG_OPTION  },
+  { OP_FAIL,                "fail",                 ARG_NON },
+  { OP_JUMP,                "jump",                 ARG_RELADDR },
+  { OP_PUSH,                "push",                 ARG_RELADDR },
+  { OP_POP,                 "pop",                  ARG_NON },
+  { OP_PUSH_OR_JUMP_EXACT1, "push-or-jump-e1",      ARG_SPECIAL },
+  { OP_PUSH_IF_PEEK_NEXT,   "push-if-peek-next",    ARG_SPECIAL },
+  { OP_REPEAT,              "repeat",               ARG_SPECIAL },
+  { OP_REPEAT_NG,           "repeat-ng",            ARG_SPECIAL },
+  { OP_REPEAT_INC,          "repeat-inc",           ARG_MEMNUM  },
+  { OP_REPEAT_INC_NG,       "repeat-inc-ng",        ARG_MEMNUM  },
+  { OP_REPEAT_INC_SG,       "repeat-inc-sg",        ARG_MEMNUM  },
+  { OP_REPEAT_INC_NG_SG,    "repeat-inc-ng-sg",     ARG_MEMNUM  },
+  { OP_NULL_CHECK_START,    "null-check-start",     ARG_MEMNUM  },
+  { OP_NULL_CHECK_END,      "null-check-end",       ARG_MEMNUM  },
+  { OP_NULL_CHECK_END_MEMST,"null-check-end-memst", ARG_MEMNUM  },
+  { OP_NULL_CHECK_END_MEMST_PUSH,"null-check-end-memst-push", ARG_MEMNUM  },
+  { OP_PUSH_POS,             "push-pos",             ARG_NON },
+  { OP_POP_POS,              "pop-pos",              ARG_NON },
+  { OP_PUSH_POS_NOT,         "push-pos-not",         ARG_RELADDR },
+  { OP_FAIL_POS,             "fail-pos",             ARG_NON },
+  { OP_PUSH_STOP_BT,         "push-stop-bt",         ARG_NON },
+  { OP_POP_STOP_BT,          "pop-stop-bt",          ARG_NON },
+  { OP_LOOK_BEHIND,          "look-behind",          ARG_SPECIAL },
   { OP_PUSH_LOOK_BEHIND_NOT, "push-look-behind-not", ARG_SPECIAL },
   { OP_FAIL_LOOK_BEHIND_NOT, "fail-look-behind-not", ARG_NON },
-  { OP_CALL,                 "call",            ARG_ABSADDR },
-  { OP_RETURN,               "return",          ARG_NON },
+  { OP_CALL,                 "call",                 ARG_ABSADDR },
+  { OP_RETURN,               "return",               ARG_NON },
+  { OP_STATE_CHECK_PUSH,         "state-check-push",         ARG_SPECIAL },
+  { OP_STATE_CHECK_PUSH_OR_JUMP, "state-check-push-or-jump", ARG_SPECIAL },
+  { OP_STATE_CHECK,              "state-check",              ARG_STATE_CHECK },
+  { OP_STATE_CHECK_ANYCHAR_STAR, "state-check-anychar*",     ARG_STATE_CHECK },
+  { OP_STATE_CHECK_ANYCHAR_ML_STAR,
+    "state-check-anychar-ml*", ARG_STATE_CHECK },
   { -1, "", ARG_NON }
 };
 
@@ -5152,6 +5614,7 @@ onig_print_compiled_byte_code(FILE* f, UChar* bp, UChar** nextp,
   RelAddrType addr;
   LengthType len;
   MemNumType mem;
+  StateCheckNumType scn;
   OnigCodePoint code;
   UChar *q;
 
@@ -5186,6 +5649,12 @@ onig_print_compiled_byte_code(FILE* f, UChar* bp, UChar** nextp,
        fprintf(f, ":%d", option);
       }
       break;
+
+    case ARG_STATE_CHECK:
+      scn = *((StateCheckNumType* )bp);
+      bp += SIZE_STATE_CHECK_NUM;
+      fprintf(f, ":%d", scn);
+      break;
     }
   }
   else {
@@ -5312,6 +5781,26 @@ onig_print_compiled_byte_code(FILE* f, UChar* bp, UChar** nextp,
       }
       break;
 
+    case OP_BACKREF_AT_LEVEL:
+      {
+       OnigOptionType option;
+       LengthType level;
+
+       GET_OPTION_INC(option, bp);
+       fprintf(f, ":%d", option);
+       GET_LENGTH_INC(level, bp);
+       fprintf(f, ":%d", level);
+
+       fputs(" ", f);
+       GET_LENGTH_INC(len, bp);
+       for (i = 0; i < len; i++) {
+         GET_MEMNUM_INC(mem, bp);
+         if (i > 0) fputs(", ", f);
+         fprintf(f, "%d", mem);
+       }
+      }
+      break;
+
     case OP_REPEAT:
     case OP_REPEAT_NG:
       {
@@ -5343,6 +5832,15 @@ onig_print_compiled_byte_code(FILE* f, UChar* bp, UChar** nextp,
       fprintf(f, ":%d:(%d)", len, addr);
       break;
 
+    case OP_STATE_CHECK_PUSH:
+    case OP_STATE_CHECK_PUSH_OR_JUMP:
+      scn = *((StateCheckNumType* )bp);
+      bp += SIZE_STATE_CHECK_NUM;
+      addr = *((RelAddrType* )bp);
+      bp += SIZE_RELADDR;
+      fprintf(f, ":%d:(%d)", scn, addr);
+      break;
+
     default:
       fprintf(stderr, "onig_print_compiled_byte_code: undefined code %d\n",
              *--bp);
@@ -5497,11 +5995,11 @@ print_indent_tree(FILE* f, Node* node, int indent)
     break;
 #endif
 
-  case N_QUALIFIER:
-    fprintf(f, "<qualifier:%x>{%d,%d}%s\n", (int )node,
-           NQUALIFIER(node).lower, NQUALIFIER(node).upper,
-           (NQUALIFIER(node).greedy ? "" : "?"));
-    print_indent_tree(f, NQUALIFIER(node).target, indent + add);
+  case N_QUANTIFIER:
+    fprintf(f, "<quantifier:%x>{%d,%d}%s\n", (int )node,
+           NQUANTIFIER(node).lower, NQUANTIFIER(node).upper,
+           (NQUANTIFIER(node).greedy ? "" : "?"));
+    print_indent_tree(f, NQUANTIFIER(node).target, indent + add);
     break;
 
   case N_EFFECT:
@@ -5530,7 +6028,7 @@ print_indent_tree(FILE* f, Node* node, int indent)
     break;
   }
 
-  if (type != N_LIST && type != N_ALT && type != N_QUALIFIER &&
+  if (type != N_LIST && type != N_ALT && type != N_QUANTIFIER &&
       type != N_EFFECT)
     fprintf(f, "\n");
   fflush(f);
index a767ca60b6af18f7f74ebcbef3fcf790e3ab771e..958917e122686936f1f6f31efe186eb8e0ff6127 100644 (file)
@@ -2,7 +2,7 @@
   regenc.c -  Oniguruma (regular expression library)
 **********************************************************************/
 /*-
- * Copyright (c) 2002-2005  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp>
+ * Copyright (c) 2002-2007  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
 OnigEncoding OnigEncDefaultCharEncoding = ONIG_ENCODING_INIT_DEFAULT;
 
 extern int
-onigenc_init()
+onigenc_init(void)
 {
   return 0;
 }
 
 extern OnigEncoding
-onigenc_get_default_encoding()
+onigenc_get_default_encoding(void)
 {
   return OnigEncDefaultCharEncoding;
 }
@@ -175,7 +175,7 @@ onigenc_str_bytelen_null(OnigEncoding enc, const UChar* s)
 
 #define USE_APPLICATION_TO_LOWER_CASE_TABLE
 
-unsigned short OnigEnc_Unicode_ISO_8859_1_CtypeTable[256] = {
+const unsigned short OnigEnc_Unicode_ISO_8859_1_CtypeTable[256] = {
   0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
   0x2008, 0x228c, 0x2289, 0x2288, 0x2288, 0x2288, 0x2008, 0x2008,
   0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
@@ -251,7 +251,7 @@ static const UChar BuiltInAsciiToLowerCaseTable[] = {
 #endif /* not USE_APPLICATION_TO_LOWER_CASE_TABLE */
 
 #ifdef USE_UPPER_CASE_TABLE
-UChar OnigEncAsciiToUpperCaseTable[256] = {
+const UChar OnigEncAsciiToUpperCaseTable[256] = {
   '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
   '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017',
   '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027',
@@ -287,7 +287,7 @@ UChar OnigEncAsciiToUpperCaseTable[256] = {
 };
 #endif
 
-unsigned short OnigEncAsciiCtypeTable[256] = {
+const unsigned short OnigEncAsciiCtypeTable[256] = {
   0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
   0x2008, 0x220c, 0x2209, 0x2208, 0x2208, 0x2208, 0x2008, 0x2008,
   0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
@@ -323,7 +323,7 @@ unsigned short OnigEncAsciiCtypeTable[256] = {
   0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000
 };
 
-UChar OnigEncISO_8859_1_ToLowerCaseTable[256] = {
+const UChar OnigEncISO_8859_1_ToLowerCaseTable[256] = {
   '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
   '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017',
   '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027',
@@ -359,7 +359,7 @@ UChar OnigEncISO_8859_1_ToLowerCaseTable[256] = {
 };
 
 #ifdef USE_UPPER_CASE_TABLE
-UChar OnigEncISO_8859_1_ToUpperCaseTable[256] = {
+const UChar OnigEncISO_8859_1_ToUpperCaseTable[256] = {
   '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
   '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017',
   '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027',
@@ -417,7 +417,7 @@ onigenc_get_left_adjust_char_head(OnigEncoding enc, const UChar* start, const UC
   return ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s);
 }
 
-OnigPairAmbigCodes OnigAsciiPairAmbigCodes[] = {
+const OnigPairAmbigCodes OnigAsciiPairAmbigCodes[] = {
   { 0x41, 0x61 },
   { 0x42, 0x62 },
   { 0x43, 0x63 },
@@ -475,7 +475,7 @@ OnigPairAmbigCodes OnigAsciiPairAmbigCodes[] = {
 
 extern int
 onigenc_ascii_get_all_pair_ambig_codes(OnigAmbigType flag,
-                                       OnigPairAmbigCodes** ccs)
+                                       const OnigPairAmbigCodes** ccs)
 {
   if (flag == ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) {
     *ccs = OnigAsciiPairAmbigCodes;
@@ -488,16 +488,16 @@ onigenc_ascii_get_all_pair_ambig_codes(OnigAmbigType flag,
 
 extern int
 onigenc_nothing_get_all_comp_ambig_codes(OnigAmbigType flag,
-                                         OnigCompAmbigCodes** ccs)
+                                         const OnigCompAmbigCodes** ccs)
 {
   return 0;
 }
 
 extern int
 onigenc_iso_8859_1_get_all_pair_ambig_codes(OnigAmbigType flag,
-                                            OnigPairAmbigCodes** ccs)
+                                            const OnigPairAmbigCodes** ccs)
 {
-  static OnigPairAmbigCodes cc[] = {
+  static const OnigPairAmbigCodes cc[] = {
     { 0xc0, 0xe0 },
     { 0xc1, 0xe1 },
     { 0xc2, 0xe2 },
@@ -577,9 +577,9 @@ onigenc_iso_8859_1_get_all_pair_ambig_codes(OnigAmbigType flag,
 
 extern int
 onigenc_ess_tsett_get_all_comp_ambig_codes(OnigAmbigType flag,
-                                           OnigCompAmbigCodes** ccs)
+                                           const OnigCompAmbigCodes** ccs)
 {
-  static OnigCompAmbigCodes folds[] = {
+  static const OnigCompAmbigCodes folds[] = {
     { 2, 0xdf, {{ 2, { 0x53, 0x53 } }, { 2, { 0x73, 0x73} } } }
   };
 
@@ -593,7 +593,7 @@ onigenc_ess_tsett_get_all_comp_ambig_codes(OnigAmbigType flag,
 
 extern int
 onigenc_not_support_get_ctype_code_range(int ctype,
-                             OnigCodePoint* sbr[], OnigCodePoint* mbr[])
+                             const OnigCodePoint* sbr[], const OnigCodePoint* mbr[])
 {
   return ONIG_NO_SUPPORT_CONFIG;
 }
@@ -830,10 +830,10 @@ onigenc_mb4_code_to_mbc(OnigEncoding enc, OnigCodePoint code, UChar *buf)
   if ((code & 0xff000000) != 0) {
     *p++ = (UChar )((code >> 24) & 0xff);
   }
-  if ((code & 0xff0000) != 0) {
+  if ((code & 0xff0000) != 0 || p != buf) {
     *p++ = (UChar )((code >> 16) & 0xff);
   }
-  if ((code & 0xff00) != 0) {
+  if ((code & 0xff00) != 0 || p != buf) {
     *p++ = (UChar )((code >> 8) & 0xff);
   }
   *p++ = (UChar )(code & 0xff);
@@ -849,40 +849,32 @@ extern int
 onigenc_mb2_is_code_ctype(OnigEncoding enc, OnigCodePoint code,
                          unsigned int ctype)
 {
-  if ((ctype & ONIGENC_CTYPE_WORD) != 0) {
-    if (code < 128)
-      return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype);
-    else
+  if (code < 128)
+    return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype);
+  else {
+    if ((ctype & (ONIGENC_CTYPE_WORD |
+                  ONIGENC_CTYPE_GRAPH | ONIGENC_CTYPE_PRINT)) != 0) {
       return (ONIGENC_CODE_TO_MBCLEN(enc, code) > 1 ? TRUE : FALSE);
-
-    ctype &= ~ONIGENC_CTYPE_WORD;
-    if (ctype == 0) return FALSE;
+    }
   }
 
-  if (code < 128)
-    return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype);
-  else
-    return FALSE;
+  return FALSE;
 }
 
 extern int
 onigenc_mb4_is_code_ctype(OnigEncoding enc, OnigCodePoint code,
                          unsigned int ctype)
 {
-  if ((ctype & ONIGENC_CTYPE_WORD) != 0) {
-    if (code < 128)
-      return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype);
-    else
+  if (code < 128)
+    return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype);
+  else {
+    if ((ctype & (ONIGENC_CTYPE_WORD |
+                  ONIGENC_CTYPE_GRAPH | ONIGENC_CTYPE_PRINT)) != 0) {
       return (ONIGENC_CODE_TO_MBCLEN(enc, code) > 1 ? TRUE : FALSE);
-
-    ctype &= ~ONIGENC_CTYPE_WORD;
-    if (ctype == 0) return FALSE;
+    }
   }
 
-  if (code < 128)
-    return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype);
-  else
-    return FALSE;
+  return FALSE;
 }
 
 extern int
index 510455146ef2d447ee3392a5bc83eaabff14c7e6..58ee3e7f22f0b3ae605adca7548ab9b4f8ef88c7 100644 (file)
@@ -4,7 +4,7 @@
   regenc.h -  Oniguruma (regular expression library)
 **********************************************************************/
 /*-
- * Copyright (c) 2002-2005  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp>
+ * Copyright (c) 2002-2006  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
 #else  /* ONIG_RUBY_M17N */
 
 #define USE_UNICODE_FULL_RANGE_CTYPE
+/* following must not use with USE_CRNL_AS_LINE_TERMINATOR */
+/* #define USE_UNICODE_ALL_LINE_TERMINATORS */  /* see Unicode.org UTF#18 */
 
 #define ONIG_ENCODING_INIT_DEFAULT           ONIG_ENCODING_ASCII
 
 /* for encoding system implementation (internal) */
-ONIG_EXTERN int onigenc_ascii_get_all_pair_ambig_codes P_((OnigAmbigType flag, OnigPairAmbigCodes** acs));
-ONIG_EXTERN int onigenc_nothing_get_all_comp_ambig_codes P_((OnigAmbigType flag, OnigCompAmbigCodes** acs));
-ONIG_EXTERN int onigenc_iso_8859_1_get_all_pair_ambig_codes P_((OnigAmbigType flag, OnigPairAmbigCodes** acs));
-ONIG_EXTERN int onigenc_ess_tsett_get_all_comp_ambig_codes P_((OnigAmbigType flag, OnigCompAmbigCodes** acs));
-ONIG_EXTERN int onigenc_not_support_get_ctype_code_range P_((int ctype, OnigCodePoint* sbr[], OnigCodePoint* mbr[]));
+ONIG_EXTERN int onigenc_ascii_get_all_pair_ambig_codes P_((OnigAmbigType flag, const OnigPairAmbigCodes** acs));
+ONIG_EXTERN int onigenc_nothing_get_all_comp_ambig_codes P_((OnigAmbigType flag, const OnigCompAmbigCodes** acs));
+ONIG_EXTERN int onigenc_iso_8859_1_get_all_pair_ambig_codes P_((OnigAmbigType flag, const OnigPairAmbigCodes** acs));
+ONIG_EXTERN int onigenc_ess_tsett_get_all_comp_ambig_codes P_((OnigAmbigType flag, const OnigCompAmbigCodes** acs));
+ONIG_EXTERN int onigenc_not_support_get_ctype_code_range P_((int ctype, const OnigCodePoint* sbr[], const OnigCodePoint* mbr[]));
 ONIG_EXTERN int onigenc_is_mbc_newline_0x0a P_((const UChar* p, const UChar* end));
 
 /* methods for single byte encoding */
@@ -105,7 +107,7 @@ ONIG_EXTERN int onigenc_get_all_fold_match_code_ss_0xdf P_((OnigCodePoint** code
 
 /* in enc/unicode.c */
 ONIG_EXTERN int onigenc_unicode_is_code_ctype P_((OnigCodePoint code, unsigned int ctype));
-ONIG_EXTERN int onigenc_unicode_get_ctype_code_range P_((int ctype, OnigCodePoint* sbr[], OnigCodePoint* mbr[]));
+ONIG_EXTERN int onigenc_unicode_get_ctype_code_range P_((int ctype, const OnigCodePoint* sbr[], const OnigCodePoint* mbr[]));
 
 
 #define ONIGENC_ISO_8859_1_TO_LOWER_CASE(c) \
@@ -115,10 +117,10 @@ ONIG_EXTERN int onigenc_unicode_get_ctype_code_range P_((int ctype, OnigCodePoin
 #define ONIGENC_IS_UNICODE_ISO_8859_1_CTYPE(code,ctype) \
   ((OnigEnc_Unicode_ISO_8859_1_CtypeTable[code] & ctype) != 0)
 
-ONIG_EXTERN UChar OnigEncISO_8859_1_ToLowerCaseTable[];
-ONIG_EXTERN UChar OnigEncISO_8859_1_ToUpperCaseTable[];
-ONIG_EXTERN unsigned short OnigEnc_Unicode_ISO_8859_1_CtypeTable[];
-ONIG_EXTERN OnigPairAmbigCodes OnigAsciiPairAmbigCodes[];
+ONIG_EXTERN const UChar OnigEncISO_8859_1_ToLowerCaseTable[];
+ONIG_EXTERN const UChar OnigEncISO_8859_1_ToUpperCaseTable[];
+ONIG_EXTERN const unsigned short OnigEnc_Unicode_ISO_8859_1_CtypeTable[];
+ONIG_EXTERN const OnigPairAmbigCodes OnigAsciiPairAmbigCodes[];
 
 #endif /* is not ONIG_RUBY_M17N */
 
@@ -133,7 +135,7 @@ extern int  onig_is_in_code_range P_((const UChar* p, OnigCodePoint code));
 ONIG_EXTERN OnigEncoding  OnigEncDefaultCharEncoding;
 ONIG_EXTERN const UChar* OnigEncAsciiToLowerCaseTable;
 ONIG_EXTERN const UChar  OnigEncAsciiToUpperCaseTable[];
-ONIG_EXTERN unsigned short OnigEncAsciiCtypeTable[];
+ONIG_EXTERN const unsigned short OnigEncAsciiCtypeTable[];
 
 #define ONIGENC_ASCII_CODE_TO_LOWER_CASE(c) OnigEncAsciiToLowerCaseTable[c]
 #define ONIGENC_ASCII_CODE_TO_UPPER_CASE(c) OnigEncAsciiToUpperCaseTable[c]
index 413b985c351f5170ee957abe6e2323f4e9115e1b..d6ec91856d1f4cb31417f83193237b07482371d4 100644 (file)
@@ -2,7 +2,7 @@
   regerror.c -  Oniguruma (regular expression library)
 **********************************************************************/
 /*-
- * Copyright (c) 2002-2005  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp>
+ * Copyright (c) 2002-2006  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
 #define va_init_list(a,b) va_start(a)
 #endif
 
-extern char*
+extern UChar*
 onig_error_code_to_format(int code)
 {
   char *p;
 
-  if (code >= 0) return (char* )0;
+  if (code >= 0) return (UChar* )0;
 
   switch (code) {
   case ONIG_MISMATCH:
@@ -170,6 +170,8 @@ onig_error_code_to_format(int code)
     p = "invalid character property name {%n}"; break;
   case ONIGERR_NOT_SUPPORTED_ENCODING_COMBINATION:
     p = "not supported encoding combination"; break;
+  case ONIGERR_INVALID_COMBINATION_OF_OPTIONS:
+    p = "invalid combination of options"; break;
   case ONIGERR_OVER_THREAD_PASS_LIMIT_COUNT:
     p = "over thread pass limit count"; break;
 
@@ -177,7 +179,49 @@ onig_error_code_to_format(int code)
     p = "undefined error code"; break;
   }
 
-  return p;
+  return (UChar* )p;
+}
+
+
+static int to_ascii(OnigEncoding enc, UChar *s, UChar *end,
+                   UChar buf[], int buf_size, int *is_over)
+{
+  int len;
+  UChar *p;
+  OnigCodePoint code;
+
+  if (ONIGENC_MBC_MINLEN(enc) > 1) {
+    p = s;
+    len = 0;
+    while (p < end) {
+      code = ONIGENC_MBC_TO_CODE(enc, p, end);
+      if (code >= 0x80) {
+       if (len + 5 <= buf_size) {
+         sprintf((char* )(&(buf[len])), "\\%03o",
+                 (unsigned int)(code & 0377));
+         len += 5;
+       }
+       else {
+         break;
+       }
+      }
+      else {
+       buf[len++] = (UChar )code;
+      }
+
+      p += enc_len(enc, p);
+      if (len >= buf_size) break;
+    }
+
+    *is_over = ((p < end) ? 1 : 0);
+  }
+  else {
+    len = MIN((end - s), buf_size);
+    xmemcpy(buf, s, (size_t )len);
+    *is_over = ((buf_size < (end - s)) ? 1 : 0);
+  }
+
+  return len;
 }
 
 
@@ -196,7 +240,8 @@ onig_error_code_to_str(s, code, va_alist)
 {
   UChar *p, *q;
   OnigErrorInfo* einfo;
-  int len;
+  int len, is_over;
+  UChar parbuf[MAX_ERROR_PAR_LEN];
   va_list vargs;
 
   va_init_list(vargs, code);
@@ -210,23 +255,20 @@ onig_error_code_to_str(s, code, va_alist)
   case ONIGERR_INVALID_CHAR_IN_GROUP_NAME:
   case ONIGERR_INVALID_CHAR_PROPERTY_NAME:
     einfo = va_arg(vargs, OnigErrorInfo*);
-    len = einfo->par_end - einfo->par;
+    len = to_ascii(einfo->enc, einfo->par, einfo->par_end,
+                  parbuf, MAX_ERROR_PAR_LEN - 3, &is_over);
     q = onig_error_code_to_format(code);
     p = s;
     while (*q != '\0') {
       if (*q == '%') {
        q++;
        if (*q == 'n') { /* '%n': name */
-         if (len > MAX_ERROR_PAR_LEN) {
-           xmemcpy(p, einfo->par, MAX_ERROR_PAR_LEN - 3);
-           p += (MAX_ERROR_PAR_LEN - 3);
+         xmemcpy(p, parbuf, len);
+         p += len;
+         if (is_over != 0) {
            xmemcpy(p, "...", 3);
            p += 3;
          }
-         else {
-           xmemcpy(p, einfo->par, len);
-           p += len;
-         }
          q++;
        }
        else
@@ -256,39 +298,36 @@ onig_error_code_to_str(s, code, va_alist)
 
 void
 #ifdef HAVE_STDARG_PROTOTYPES
-onig_snprintf_with_pattern(char buf[], int bufsize, OnigEncoding enc,
-                           char* pat, char* pat_end, char *fmt, ...)
+onig_snprintf_with_pattern(UChar buf[], int bufsize, OnigEncoding enc,
+                           UChar* pat, UChar* pat_end, const UChar *fmt, ...)
 #else
 onig_snprintf_with_pattern(buf, bufsize, enc, pat, pat_end, fmt, va_alist)
-    char buf[];
+    UChar buf[];
     int bufsize;
     OnigEncoding enc;
-    char* pat;
-    char* pat_end;
-    const char *fmt;
+    UChar* pat;
+    UChar* pat_end;
+    const UChar *fmt;
     va_dcl
 #endif
 {
   int n, need, len;
   UChar *p, *s, *bp;
-  char bs[6];
+  UChar bs[6];
   va_list args;
 
   va_init_list(args, fmt);
-  n = vsnprintf(buf, bufsize, fmt, args);
-  if (n < 0 || n >= bufsize) {
-    n = bufsize - 1;
-  }
+  n = vsnprintf((char* )buf, bufsize, (const char* )fmt, args);
   va_end(args);
 
   need = (pat_end - pat) * 4 + 4;
 
   if (n + need < bufsize) {
-    strcat(buf, ": /");
+    strcat((char* )buf, ": /");
     s = buf + onigenc_str_bytelen_null(ONIG_ENCODING_ASCII, buf);
 
     p = pat;
-    while (p < (UChar* )pat_end) {
+    while (p < pat_end) {
       if (*p == MC_ESC(enc)) {
        *s++ = *p++;
        len = enc_len(enc, p);
@@ -307,7 +346,7 @@ onig_snprintf_with_pattern(buf, bufsize, enc, pat, pat_end, fmt, va_alist)
           int blen;
 
           while (len-- > 0) {
-            sprintf(bs, "\\%03o", *p++ & 0377);
+            sprintf((char* )bs, "\\%03o", *p++ & 0377);
             blen = onigenc_str_bytelen_null(ONIG_ENCODING_ASCII, bs);
             bp = bs;
             while (blen-- > 0) *s++ = *bp++;
@@ -316,7 +355,7 @@ onig_snprintf_with_pattern(buf, bufsize, enc, pat, pat_end, fmt, va_alist)
       }
       else if (!ONIGENC_IS_CODE_PRINT(enc, *p) &&
               !ONIGENC_IS_CODE_SPACE(enc, *p)) {
-       sprintf(bs, "\\%03o", *p++ & 0377);
+       sprintf((char* )bs, "\\%03o", *p++ & 0377);
        len = onigenc_str_bytelen_null(ONIG_ENCODING_ASCII, bs);
         bp = bs;
        while (len-- > 0) *s++ = *bp++;
index 25d97773fbe5aa134fc7938acee0288cafc19754..918aa67aa88bd6565c9daa3e44ee20a28f2d0843 100644 (file)
@@ -2,7 +2,7 @@
   regexec.c -  Oniguruma (regular expression library)
 **********************************************************************/
 /*-
- * Copyright (c) 2002-2005  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp>
+ * Copyright (c) 2002-2007  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
 
 #include "regint.h"
 
+#ifdef USE_CRNL_AS_LINE_TERMINATOR
+#define ONIGENC_IS_MBC_CRNL(enc,p,end) \
+  (ONIGENC_MBC_TO_CODE(enc,p,end) == 13 && \
+   ONIGENC_IS_MBC_NEWLINE(enc,(p+enc_len(enc,p)),end))
+#endif
+
 #ifdef USE_CAPTURE_HISTORY
 static void history_tree_free(OnigCaptureTreeNode* node);
 
@@ -70,7 +76,7 @@ history_root_free(OnigRegion* r)
 }
 
 static OnigCaptureTreeNode*
-history_node_new()
+history_node_new(void)
 {
   OnigCaptureTreeNode* node;
 
@@ -227,7 +233,7 @@ onig_region_init(OnigRegion* region)
 }
 
 extern OnigRegion*
-onig_region_new()
+onig_region_new(void)
 {
   OnigRegion* r;
 
@@ -300,6 +306,9 @@ typedef struct _StackType {
       UChar *pcode;      /* byte code position */
       UChar *pstr;       /* string position */
       UChar *pstr_prev;  /* previous char position of pstr */
+#ifdef USE_COMBINATION_EXPLOSION_CHECK
+      unsigned int state_check;
+#endif
     } state;
     struct {
       int   count;       /* for OP_REPEAT_INC, OP_REPEAT_INC_NG */
@@ -333,28 +342,28 @@ typedef struct _StackType {
 /* stack type */
 /* used by normal-POP */
 #define STK_ALT                    0x0001
-#define STK_LOOK_BEHIND_NOT        0x0003
-#define STK_POS_NOT                0x0005
-/* avoided by normal-POP, but value should be small */
-#define STK_NULL_CHECK_START       0x0100
+#define STK_LOOK_BEHIND_NOT        0x0002
+#define STK_POS_NOT                0x0003
 /* handled by normal-POP */
-#define STK_MEM_START              0x0200
-#define STK_MEM_END                0x0300
-#define STK_REPEAT_INC             0x0400
+#define STK_MEM_START              0x0100
+#define STK_MEM_END                0x8200
+#define STK_REPEAT_INC             0x0300
+#define STK_STATE_CHECK_MARK       0x1000
 /* avoided by normal-POP */
+#define STK_NULL_CHECK_START       0x3000
+#define STK_NULL_CHECK_END         0x5000  /* for recursive call */
+#define STK_MEM_END_MARK           0x8400
 #define STK_POS                    0x0500  /* used when POP-POS */
 #define STK_STOP_BT                0x0600  /* mark for "(?>...)" */
 #define STK_REPEAT                 0x0700
 #define STK_CALL_FRAME             0x0800
 #define STK_RETURN                 0x0900
-#define STK_MEM_END_MARK           0x0a00
-#define STK_VOID                   0x0b00  /* for fill a blank */
-#define STK_NULL_CHECK_END         0x0c00  /* for recursive call */
+#define STK_VOID                   0x0a00  /* for fill a blank */
 
 /* stack type check mask */
-#define STK_MASK_POP_USED     0x00ff
-#define IS_TO_VOID_TARGET(stk) \
-     (((stk)->type & STK_MASK_POP_USED) || (stk)->type == STK_NULL_CHECK_START)
+#define STK_MASK_POP_USED          0x00ff
+#define STK_MASK_TO_VOID_TARGET    0x10ff
+#define STK_MASK_MEM_END_OR_MARK   0x8000  /* MEM_END or MEM_END_MARK */
 
 typedef struct {
   void* stack_p;
@@ -362,16 +371,72 @@ typedef struct {
   OnigOptionType options;
   OnigRegion*    region;
   const UChar* start;   /* search start position (for \G: BEGIN_POSITION) */
+#ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
+  int    best_len;      /* for ONIG_OPTION_FIND_LONGEST */
+  UChar* best_s;
+#endif
+#ifdef USE_COMBINATION_EXPLOSION_CHECK
+  void* state_check_buff;
+  int   state_check_buff_size;
+#endif
 } MatchArg;
 
+#ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
+#define MATCH_ARG_INIT(msa, arg_option, arg_region, arg_start) do {\
+  (msa).stack_p  = (void* )0;\
+  (msa).options  = (arg_option);\
+  (msa).region   = (arg_region);\
+  (msa).start    = (arg_start);\
+  (msa).best_len = ONIG_MISMATCH;\
+} while (0)
+#else
 #define MATCH_ARG_INIT(msa, arg_option, arg_region, arg_start) do {\
-  (msa).stack_p = (void* )0;\
-  (msa).options = (arg_option);\
-  (msa).region  = (arg_region);\
-  (msa).start   = (arg_start);\
+  (msa).stack_p  = (void* )0;\
+  (msa).options  = (arg_option);\
+  (msa).region   = (arg_region);\
+  (msa).start    = (arg_start);\
+} while (0)
+#endif
+
+#ifdef USE_COMBINATION_EXPLOSION_CHECK
+
+#define STATE_CHECK_BUFF_MALLOC_THRESHOLD_SIZE  16
+
+#define STATE_CHECK_BUFF_INIT(msa, str_len, offset, state_num) do {    \
+  if ((state_num) > 0 && str_len >= STATE_CHECK_STRING_THRESHOLD_LEN) {\
+    unsigned int size = (unsigned int )(((str_len) + 1) * (state_num) + 7) >> 3;\
+    offset = ((offset) * (state_num)) >> 3;\
+    if (size > 0 && offset < size && size < STATE_CHECK_BUFF_MAX_SIZE) {\
+      if (size >= STATE_CHECK_BUFF_MALLOC_THRESHOLD_SIZE) \
+        (msa).state_check_buff = (void* )xmalloc(size);\
+      else \
+        (msa).state_check_buff = (void* )xalloca(size);\
+      xmemset(((char* )((msa).state_check_buff)+(offset)), 0, \
+              (size_t )(size - (offset))); \
+      (msa).state_check_buff_size = size;\
+    }\
+    else {\
+      (msa).state_check_buff = (void* )0;\
+      (msa).state_check_buff_size = 0;\
+    }\
+  }\
+  else {\
+    (msa).state_check_buff = (void* )0;\
+    (msa).state_check_buff_size = 0;\
+  }\
 } while (0)
 
-#define MATCH_ARG_FREE(msa)   if ((msa).stack_p) xfree((msa).stack_p)
+#define MATCH_ARG_FREE(msa) do {\
+  if ((msa).stack_p) xfree((msa).stack_p);\
+  if ((msa).state_check_buff_size >= STATE_CHECK_BUFF_MALLOC_THRESHOLD_SIZE) { \
+    if ((msa).state_check_buff) xfree((msa).state_check_buff);\
+  }\
+} while (0);
+#else
+#define STATE_CHECK_BUFF_INIT(msa, str_len, offset, state_num)
+#define MATCH_ARG_FREE(msa)  if ((msa).stack_p) xfree((msa).stack_p)
+#endif
+
 
 
 #define STACK_INIT(alloc_addr, ptr_num, stack_num)  do {\
@@ -465,27 +530,89 @@ stack_double(StackType** arg_stk_base, StackType** arg_stk_end,
 #define STACK_AT(index)        (stk_base + (index))
 #define GET_STACK_INDEX(stk)   ((stk) - stk_base)
 
+#define STACK_PUSH_TYPE(stack_type) do {\
+  STACK_ENSURE(1);\
+  stk->type = (stack_type);\
+  STACK_INC;\
+} while(0)
+
+#define IS_TO_VOID_TARGET(stk) (((stk)->type & STK_MASK_TO_VOID_TARGET) != 0)
+
+#ifdef USE_COMBINATION_EXPLOSION_CHECK
+#define STATE_CHECK_POS(s,snum) \
+  (((s) - str) * num_comb_exp_check + ((snum) - 1))
+#define STATE_CHECK_VAL(v,snum) do {\
+  if (state_check_buff != NULL) {\
+    int x = STATE_CHECK_POS(s,snum);\
+    (v) = state_check_buff[x/8] & (1<<(x%8));\
+  }\
+  else (v) = 0;\
+} while(0)
+
+
+#define ELSE_IF_STATE_CHECK_MARK(stk) \
+  else if ((stk)->type == STK_STATE_CHECK_MARK) { \
+    int x = STATE_CHECK_POS(stk->u.state.pstr, stk->u.state.state_check);\
+    state_check_buff[x/8] |= (1<<(x%8));                               \
+  }
+
 #define STACK_PUSH(stack_type,pat,s,sprev) do {\
   STACK_ENSURE(1);\
   stk->type = (stack_type);\
   stk->u.state.pcode     = (pat);\
   stk->u.state.pstr      = (s);\
   stk->u.state.pstr_prev = (sprev);\
+  stk->u.state.state_check = 0;\
   STACK_INC;\
 } while(0)
 
 #define STACK_PUSH_ENSURED(stack_type,pat) do {\
   stk->type = (stack_type);\
   stk->u.state.pcode = (pat);\
+  stk->u.state.state_check = 0;\
   STACK_INC;\
 } while(0)
 
-#define STACK_PUSH_TYPE(stack_type) do {\
+#define STACK_PUSH_ALT_WITH_STATE_CHECK(pat,s,sprev,snum) do {\
+  STACK_ENSURE(1);\
+  stk->type = STK_ALT;\
+  stk->u.state.pcode     = (pat);\
+  stk->u.state.pstr      = (s);\
+  stk->u.state.pstr_prev = (sprev);\
+  stk->u.state.state_check = ((state_check_buff != NULL) ? (snum) : 0);\
+  STACK_INC;\
+} while(0)
+
+#define STACK_PUSH_STATE_CHECK(s,snum) do {\
+  if (state_check_buff != NULL) {\
+    STACK_ENSURE(1);\
+    stk->type = STK_STATE_CHECK_MARK;\
+    stk->u.state.pstr = (s);\
+    stk->u.state.state_check = (snum);\
+    STACK_INC;\
+  }\
+} while(0)
+
+#else /* USE_COMBINATION_EXPLOSION_CHECK */
+
+#define ELSE_IF_STATE_CHECK_MARK(stk)
+
+#define STACK_PUSH(stack_type,pat,s,sprev) do {\
   STACK_ENSURE(1);\
   stk->type = (stack_type);\
+  stk->u.state.pcode     = (pat);\
+  stk->u.state.pstr      = (s);\
+  stk->u.state.pstr_prev = (sprev);\
   STACK_INC;\
 } while(0)
 
+#define STACK_PUSH_ENSURED(stack_type,pat) do {\
+  stk->type = (stack_type);\
+  stk->u.state.pcode = (pat);\
+  STACK_INC;\
+} while(0)
+#endif /* USE_COMBINATION_EXPLOSION_CHECK */
+
 #define STACK_PUSH_ALT(pat,s,sprev)     STACK_PUSH(STK_ALT,pat,s,sprev)
 #define STACK_PUSH_POS(s,sprev)         STACK_PUSH(STK_POS,NULL_UCHARP,s,sprev)
 #define STACK_PUSH_POS_NOT(pat,s,sprev) STACK_PUSH(STK_POS_NOT,pat,s,sprev)
@@ -544,7 +671,7 @@ stack_double(StackType** arg_stk_base, StackType** arg_stk_end,
   k = stk;\
   while (k > stk_base) {\
     k--;\
-    if ((k->type == STK_MEM_END_MARK || k->type == STK_MEM_END) \
+    if ((k->type & STK_MASK_MEM_END_OR_MARK) != 0 \
       && k->u.mem.num == (mnum)) {\
       level++;\
     }\
@@ -603,15 +730,18 @@ stack_double(StackType** arg_stk_base, StackType** arg_stk_end,
 
 
 #ifdef ONIG_DEBUG
-#define STACK_BASE_CHECK(p) \
-  if ((p) < stk_base)  goto stack_error;
+#define STACK_BASE_CHECK(p, at) \
+  if ((p) < stk_base) {\
+    fprintf(stderr, "at %s\n", at);\
+    goto stack_error;\
+  }
 #else
-#define STACK_BASE_CHECK(p)
+#define STACK_BASE_CHECK(p, at)
 #endif
 
 #define STACK_POP_ONE do {\
   stk--;\
-  STACK_BASE_CHECK(stk); \
+  STACK_BASE_CHECK(stk, "STACK_POP_ONE"); \
 } while(0)
 
 #define STACK_POP  do {\
@@ -619,25 +749,27 @@ stack_double(StackType** arg_stk_base, StackType** arg_stk_end,
   case STACK_POP_LEVEL_FREE:\
     while (1) {\
       stk--;\
-      STACK_BASE_CHECK(stk); \
+      STACK_BASE_CHECK(stk, "STACK_POP"); \
       if ((stk->type & STK_MASK_POP_USED) != 0)  break;\
+      ELSE_IF_STATE_CHECK_MARK(stk);\
     }\
     break;\
   case STACK_POP_LEVEL_MEM_START:\
     while (1) {\
       stk--;\
-      STACK_BASE_CHECK(stk); \
+      STACK_BASE_CHECK(stk, "STACK_POP 2"); \
       if ((stk->type & STK_MASK_POP_USED) != 0)  break;\
       else if (stk->type == STK_MEM_START) {\
         mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
         mem_end_stk[stk->u.mem.num]   = stk->u.mem.end;\
       }\
+      ELSE_IF_STATE_CHECK_MARK(stk);\
     }\
     break;\
   default:\
     while (1) {\
       stk--;\
-      STACK_BASE_CHECK(stk); \
+      STACK_BASE_CHECK(stk, "STACK_POP 3"); \
       if ((stk->type & STK_MASK_POP_USED) != 0)  break;\
       else if (stk->type == STK_MEM_START) {\
         mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
@@ -650,6 +782,7 @@ stack_double(StackType** arg_stk_base, StackType** arg_stk_end,
         mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
         mem_end_stk[stk->u.mem.num]   = stk->u.mem.end;\
       }\
+      ELSE_IF_STATE_CHECK_MARK(stk);\
     }\
     break;\
   }\
@@ -658,7 +791,7 @@ stack_double(StackType** arg_stk_base, StackType** arg_stk_end,
 #define STACK_POP_TIL_POS_NOT  do {\
   while (1) {\
     stk--;\
-    STACK_BASE_CHECK(stk); \
+    STACK_BASE_CHECK(stk, "STACK_POP_TIL_POS_NOT"); \
     if (stk->type == STK_POS_NOT) break;\
     else if (stk->type == STK_MEM_START) {\
       mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
@@ -671,13 +804,14 @@ stack_double(StackType** arg_stk_base, StackType** arg_stk_end,
       mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
       mem_end_stk[stk->u.mem.num]   = stk->u.mem.end;\
     }\
+    ELSE_IF_STATE_CHECK_MARK(stk);\
   }\
 } while(0)
 
 #define STACK_POP_TIL_LOOK_BEHIND_NOT  do {\
   while (1) {\
     stk--;\
-    STACK_BASE_CHECK(stk); \
+    STACK_BASE_CHECK(stk, "STACK_POP_TIL_LOOK_BEHIND_NOT"); \
     if (stk->type == STK_LOOK_BEHIND_NOT) break;\
     else if (stk->type == STK_MEM_START) {\
       mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
@@ -690,6 +824,7 @@ stack_double(StackType** arg_stk_base, StackType** arg_stk_end,
       mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
       mem_end_stk[stk->u.mem.num]   = stk->u.mem.end;\
     }\
+    ELSE_IF_STATE_CHECK_MARK(stk);\
   }\
 } while(0)
 
@@ -697,7 +832,7 @@ stack_double(StackType** arg_stk_base, StackType** arg_stk_end,
   k = stk;\
   while (1) {\
     k--;\
-    STACK_BASE_CHECK(k); \
+    STACK_BASE_CHECK(k, "STACK_POS_END"); \
     if (IS_TO_VOID_TARGET(k)) {\
       k->type = STK_VOID;\
     }\
@@ -712,7 +847,7 @@ stack_double(StackType** arg_stk_base, StackType** arg_stk_end,
   StackType *k = stk;\
   while (1) {\
     k--;\
-    STACK_BASE_CHECK(k); \
+    STACK_BASE_CHECK(k, "STACK_STOP_BT_END"); \
     if (IS_TO_VOID_TARGET(k)) {\
       k->type = STK_VOID;\
     }\
@@ -727,7 +862,7 @@ stack_double(StackType** arg_stk_base, StackType** arg_stk_end,
   StackType* k = stk;\
   while (1) {\
     k--;\
-    STACK_BASE_CHECK(k); \
+    STACK_BASE_CHECK(k, "STACK_NULL_CHECK"); \
     if (k->type == STK_NULL_CHECK_START) {\
       if (k->u.null_check.num == (id)) {\
         (isnull) = (k->u.null_check.pstr == (s));\
@@ -742,7 +877,7 @@ stack_double(StackType** arg_stk_base, StackType** arg_stk_end,
   StackType* k = stk;\
   while (1) {\
     k--;\
-    STACK_BASE_CHECK(k); \
+    STACK_BASE_CHECK(k, "STACK_NULL_CHECK_REC"); \
     if (k->type == STK_NULL_CHECK_START) {\
       if (k->u.null_check.num == (id)) {\
         if (level == 0) {\
@@ -762,7 +897,7 @@ stack_double(StackType** arg_stk_base, StackType** arg_stk_end,
   StackType* k = stk;\
   while (1) {\
     k--;\
-    STACK_BASE_CHECK(k); \
+    STACK_BASE_CHECK(k, "STACK_NULL_CHECK_MEMST"); \
     if (k->type == STK_NULL_CHECK_START) {\
       if (k->u.null_check.num == (id)) {\
         if (k->u.null_check.pstr != (s)) {\
@@ -802,7 +937,7 @@ stack_double(StackType** arg_stk_base, StackType** arg_stk_end,
   StackType* k = stk;\
   while (1) {\
     k--;\
-    STACK_BASE_CHECK(k); \
+    STACK_BASE_CHECK(k, "STACK_NULL_CHECK_MEMST_REC"); \
     if (k->type == STK_NULL_CHECK_START) {\
       if (k->u.null_check.num == (id)) {\
         if (level == 0) {\
@@ -850,7 +985,7 @@ stack_double(StackType** arg_stk_base, StackType** arg_stk_end,
   k = stk;\
   while (1) {\
     k--;\
-    STACK_BASE_CHECK(k); \
+    STACK_BASE_CHECK(k, "STACK_GET_REPEAT"); \
     if (k->type == STK_REPEAT) {\
       if (level == 0) {\
         if (k->u.repeat.num == (id)) {\
@@ -868,7 +1003,7 @@ stack_double(StackType** arg_stk_base, StackType** arg_stk_end,
   StackType* k = stk;\
   while (1) {\
     k--;\
-    STACK_BASE_CHECK(k); \
+    STACK_BASE_CHECK(k, "STACK_RETURN"); \
     if (k->type == STK_CALL_FRAME) {\
       if (level == 0) {\
         (addr) = k->u.call_frame.ret_addr;\
@@ -937,6 +1072,7 @@ static int string_cmp_ic(OnigEncoding enc, int ambig_flag,
     is_fail = 0; \
 } while(0)
 
+
 #define ON_STR_BEGIN(s)  ((s) == str)
 #define ON_STR_END(s)    ((s) == end)
 #define IS_EMPTY_STR     (str == end)
@@ -988,6 +1124,77 @@ make_capture_history_tree(OnigCaptureTreeNode* node, StackType** kp,
 }
 #endif
 
+#ifdef USE_BACKREF_AT_LEVEL
+static int mem_is_in_memp(int mem, int num, UChar* memp)
+{
+  int i;
+  MemNumType m;
+
+  for (i = 0; i < num; i++) {
+    GET_MEMNUM_INC(m, memp);
+    if (mem == (int )m) return 1;
+  }
+  return 0;
+}
+
+static int backref_match_at_nested_level(regex_t* reg
+        , StackType* top, StackType* stk_base
+        , int ignore_case, int ambig_flag
+        , int nest, int mem_num, UChar* memp, UChar** s, const UChar* send)
+{
+  UChar *ss, *p, *pstart, *pend = NULL_UCHARP;
+  int level;
+  StackType* k;
+
+  level = 0;
+  k = top;
+  k--;
+  while (k >= stk_base) {
+    if (k->type == STK_CALL_FRAME) {
+      level--;
+    }
+    else if (k->type == STK_RETURN) {
+      level++;
+    }
+    else if (level == nest) {
+      if (k->type == STK_MEM_START) {
+       if (mem_is_in_memp(k->u.mem.num, mem_num, memp)) {
+         pstart = k->u.mem.pstr;
+         if (pend != NULL_UCHARP) {
+           if (pend - pstart > send - *s) return 0; /* or goto next_mem; */
+           p  = pstart;
+           ss = *s;
+
+           if (ignore_case != 0) {
+             if (string_cmp_ic(reg->enc, ambig_flag,
+                               pstart, &ss, (int )(pend - pstart)) == 0)
+               return 0; /* or goto next_mem; */
+           }
+           else {
+             while (p < pend) {
+               if (*p++ != *ss++) return 0; /* or goto next_mem; */
+             }
+           }
+
+           *s = ss;
+           return 1;
+         }
+       }
+      }
+      else if (k->type == STK_MEM_END) {
+       if (mem_is_in_memp(k->u.mem.num, mem_num, memp)) {
+         pend = k->u.mem.pstr;
+       }
+      }
+    }
+    k--;
+  }
+
+  return 0;
+}
+#endif /* USE_BACKREF_AT_LEVEL */
+
+
 #ifdef RUBY_PLATFORM
 
 typedef struct {
@@ -1003,7 +1210,7 @@ trap_ensure(VALUE arg)
   TrapEnsureArg* ta = (TrapEnsureArg* )arg;
 
   if (ta->state == 0) { /* trap_exec() is not normal return */
-    ONIG_STATE_DEC(ta->reg);
+    ONIG_STATE_DEC_THREAD(ta->reg);
     if (! IS_NULL(ta->msa->stack_p) && ta->stk_base != ta->msa->stack_p)
       xfree(ta->stk_base);
 
@@ -1098,14 +1305,14 @@ static int MaxStackDepth = 0;
 /*
  * :nodoc:
  */
-static VALUE onig_stat_print()
+static VALUE onig_stat_print(void)
 {
   onig_print_statistics(stderr);
   return Qnil;
 }
 #endif
 
-extern void onig_statistics_init()
+extern void onig_statistics_init(void)
 {
   int i;
   for (i = 0; i < 256; i++) {
@@ -1165,27 +1372,43 @@ onig_is_in_code_range(const UChar* p, OnigCodePoint code)
 }
 
 static int
-code_is_in_cclass_node(void* node, OnigCodePoint code, int enclen)
+is_code_in_cc(int enclen, OnigCodePoint code, CClassNode* cc)
 {
-  unsigned int in_cc;
-  CClassNode* cc = (CClassNode* )node;
+  int found;
 
-  if (enclen == 1) {
-    in_cc = BITSET_AT(cc->bs, code);
+  if (enclen > 1 || (code >= SINGLE_BYTE_SIZE)) {
+    if (IS_NULL(cc->mbuf)) {
+      found = 0;
+    }
+    else {
+      found = (onig_is_in_code_range(cc->mbuf->p, code) != 0 ? 1 : 0);
+    }
   }
   else {
-    UChar* p = ((BBuf* )(cc->mbuf))->p;
-    in_cc = onig_is_in_code_range(p, code);
+    found = (BITSET_AT(cc->bs, code) == 0 ? 0 : 1);
   }
 
-  if (IS_CCLASS_NOT(cc)) {
-    return (in_cc ? 0 : 1);
+  if (IS_CCLASS_NOT(cc))
+    return !found;
+  else
+    return found;
+}
+
+extern int
+onig_is_code_in_cc(OnigEncoding enc, OnigCodePoint code, CClassNode* cc)
+{
+  int len;
+
+  if (ONIGENC_MBC_MINLEN(enc) > 1) {
+    len = 2;
   }
   else {
-    return (in_cc ? 1 : 0);
+    len = ONIGENC_CODE_TO_MBCLEN(enc, code);
   }
+  return is_code_in_cc(len, code, cc);
 }
 
+
 /* matching region of POSIX API */
 typedef int regoff_t;
 
@@ -1217,6 +1440,11 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart,
   StackIndex si;
   StackIndex *repeat_stk;
   StackIndex *mem_start_stk, *mem_end_stk;
+#ifdef USE_COMBINATION_EXPLOSION_CHECK
+  int scv;
+  unsigned char* state_check_buff = msa->state_check_buff;
+  int num_comb_exp_check = reg->num_comb_exp_check;
+#endif
   n = reg->num_repeat + reg->num_mem * 2;
 
   STACK_INIT(alloca_base, n, INIT_MATCH_STACK_SIZE);
@@ -1270,8 +1498,19 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart,
     case OP_END:  STAT_OP_IN(OP_END);
       n = s - sstart;
       if (n > best_len) {
-       OnigRegion* region = msa->region;
+       OnigRegion* region;
+#ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
+       if (IS_FIND_LONGEST(option)) {
+         if (n > msa->best_len) {
+           msa->best_len = n;
+           msa->best_s   = (UChar* )sstart;
+         }
+         else
+           goto end_best_len;
+        }
+#endif
        best_len = n;
+       region = msa->region;
        if (region) {
 #ifdef USE_POSIX_REGION_OPTION
          if (IS_POSIX_REGION(msa->options)) {
@@ -1347,6 +1586,10 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart,
 #endif
        } /* if (region) */
       } /* n > best_len */
+
+#ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
+    end_best_len:
+#endif
       STAT_OP_OUT;
 
       if (IS_FIND_CONDITION(option)) {
@@ -1384,24 +1627,12 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart,
         ss = s;
         sp = p;
 
-      exact1_ic_retry:
        len = ONIGENC_MBC_TO_NORMALIZE(encode, ambig_flag, &s, end, lowbuf);
        DATA_ENSURE(0);
        q = lowbuf;
        while (len-- > 0) {
          if (*p != *q) {
-#if 1
-            if ((ambig_flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0) {
-              ambig_flag &= ~ONIGENC_AMBIGUOUS_MATCH_COMPOUND;
-              s = ss;
-              p = sp;
-              goto exact1_ic_retry;
-            }
-            else
-              goto fail;
-#else
             goto fail;
-#endif
           }
          p++; q++;
        }
@@ -1490,24 +1721,12 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart,
           ss = s;
           sp = p;
 
-        exactn_ic_retry:
          len = ONIGENC_MBC_TO_NORMALIZE(encode, ambig_flag, &s, end, lowbuf);
          DATA_ENSURE(0);
          q = lowbuf;
          while (len-- > 0) {
            if (*p != *q) {
-#if 1
-              if ((ambig_flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0) {
-                ambig_flag &= ~ONIGENC_AMBIGUOUS_MATCH_COMPOUND;
-                s = ss;
-                p = sp;
-                goto exactn_ic_retry;
-              }
-              else
-                goto fail;
-#else
               goto fail;
-#endif
             }
            p++; q++;
          }
@@ -1739,8 +1958,9 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart,
        mb_len = enc_len(encode, s);
        ss = s;
        s += mb_len;
+       DATA_ENSURE(0);
        code = ONIGENC_MBC_TO_CODE(encode, ss, s);
-        if (code_is_in_cclass_node(node, code, mb_len) == 0) goto fail;
+       if (is_code_in_cc(mb_len, code, node) == 0) goto fail;
       }
       STAT_OP_OUT;
       break;
@@ -1826,6 +2046,47 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart,
       STAT_OP_OUT;
       break;
 
+#ifdef USE_COMBINATION_EXPLOSION_CHECK
+    case OP_STATE_CHECK_ANYCHAR_STAR:  STAT_OP_IN(OP_STATE_CHECK_ANYCHAR_STAR);
+      GET_STATE_CHECK_NUM_INC(mem, p);
+      while (s < end) {
+       STATE_CHECK_VAL(scv, mem);
+       if (scv) goto fail;
+
+       STACK_PUSH_ALT_WITH_STATE_CHECK(p, s, sprev, mem);
+       n = enc_len(encode, s);
+        DATA_ENSURE(n);
+        if (ONIGENC_IS_MBC_NEWLINE(encode, s, end))  goto fail;
+        sprev = s;
+        s += n;
+      }
+      STAT_OP_OUT;
+      break;
+
+    case OP_STATE_CHECK_ANYCHAR_ML_STAR:
+      STAT_OP_IN(OP_STATE_CHECK_ANYCHAR_ML_STAR);
+
+      GET_STATE_CHECK_NUM_INC(mem, p);
+      while (s < end) {
+       STATE_CHECK_VAL(scv, mem);
+       if (scv) goto fail;
+
+       STACK_PUSH_ALT_WITH_STATE_CHECK(p, s, sprev, mem);
+       n = enc_len(encode, s);
+       if (n > 1) {
+         DATA_ENSURE(n);
+         sprev = s;
+         s += n;
+       }
+       else {
+         sprev = s;
+         s++;
+       }
+      }
+      STAT_OP_OUT;
+      break;
+#endif /* USE_COMBINATION_EXPLOSION_CHECK */
+
     case OP_WORD:  STAT_OP_IN(OP_WORD);
       DATA_ENSURE(1);
       if (! ONIGENC_IS_MBC_WORD(encode, s, end))
@@ -1946,6 +2207,12 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart,
        STAT_OP_OUT;
        continue;
       }
+#ifdef USE_CRNL_AS_LINE_TERMINATOR
+      else if (ONIGENC_IS_MBC_CRNL(encode, s, end)) {
+       STAT_OP_OUT;
+       continue;
+      }
+#endif
       goto fail;
       break;
 
@@ -1966,6 +2233,15 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart,
        STAT_OP_OUT;
        continue;
       }
+#ifdef USE_CRNL_AS_LINE_TERMINATOR
+      else if (ONIGENC_IS_MBC_CRNL(encode, s, end)) {
+        UChar* ss = s + enc_len(encode, s);
+        if (ON_STR_END(ss + enc_len(encode, ss))) {
+          STAT_OP_OUT;
+          continue;
+        }
+      }
+#endif
       goto fail;
       break;
 
@@ -2041,11 +2317,6 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart,
       goto backref;
       break;
 
-    case OP_BACKREF3:  STAT_OP_IN(OP_BACKREF3);
-      mem = 3;
-      goto backref;
-      break;
-
     case OP_BACKREFN:  STAT_OP_IN(OP_BACKREFN);
       GET_MEMNUM_INC(mem, p);
     backref:
@@ -2188,6 +2459,35 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart,
        continue;
       }
       break;
+
+#ifdef USE_BACKREF_AT_LEVEL
+    case OP_BACKREF_AT_LEVEL:
+      {
+       int len;
+       OnigOptionType ic;
+       LengthType level;
+
+       GET_OPTION_INC(ic,    p);
+       GET_LENGTH_INC(level, p);
+       GET_LENGTH_INC(tlen,  p);
+
+       sprev = s;
+       if (backref_match_at_nested_level(reg, stk, stk_base, ic, ambig_flag
+                                 , (int )level, (int )tlen, p, &s, end)) {
+         while (sprev + (len = enc_len(encode, sprev)) < s)
+           sprev += len;
+
+         p += (SIZE_MEMNUM * tlen);
+       }
+       else
+         goto fail;
+
+       STAT_OP_OUT;
+       continue;
+      }
+      
+      break;
+#endif
     
     case OP_SET_OPTION_PUSH:  STAT_OP_IN(OP_SET_OPTION_PUSH);
       GET_OPTION_INC(option, p);
@@ -2309,6 +2609,43 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart,
       continue;
       break;
 
+#ifdef USE_COMBINATION_EXPLOSION_CHECK
+    case OP_STATE_CHECK_PUSH:  STAT_OP_IN(OP_STATE_CHECK_PUSH);
+      GET_STATE_CHECK_NUM_INC(mem, p);
+      STATE_CHECK_VAL(scv, mem);
+      if (scv) goto fail;
+
+      GET_RELADDR_INC(addr, p);
+      STACK_PUSH_ALT_WITH_STATE_CHECK(p + addr, s, sprev, mem);
+      STAT_OP_OUT;
+      continue;
+      break;
+
+    case OP_STATE_CHECK_PUSH_OR_JUMP:  STAT_OP_IN(OP_STATE_CHECK_PUSH_OR_JUMP);
+      GET_STATE_CHECK_NUM_INC(mem, p);
+      GET_RELADDR_INC(addr, p);
+      STATE_CHECK_VAL(scv, mem);
+      if (scv) {
+       p += addr;
+      }
+      else {
+       STACK_PUSH_ALT_WITH_STATE_CHECK(p + addr, s, sprev, mem);
+      }
+      STAT_OP_OUT;
+      continue;
+      break;
+
+    case OP_STATE_CHECK:  STAT_OP_IN(OP_STATE_CHECK);
+      GET_STATE_CHECK_NUM_INC(mem, p);
+      STATE_CHECK_VAL(scv, mem);
+      if (scv) goto fail;
+
+      STACK_PUSH_STATE_CHECK(s, mem);
+      STAT_OP_OUT;
+      continue;
+      break;
+#endif /* USE_COMBINATION_EXPLOSION_CHECK */
+
     case OP_POP:  STAT_OP_IN(OP_POP);
       STACK_POP_ONE;
       STAT_OP_OUT;
@@ -2383,7 +2720,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart,
 
     repeat_inc:
       stkp->u.repeat.count++;
-      if (stkp->u.repeat.count == reg->repeat_range[mem].upper) {
+      if (stkp->u.repeat.count >= reg->repeat_range[mem].upper) {
         /* end of repeat. Nothing to do. */
       }
       else if (stkp->u.repeat.count >= reg->repeat_range[mem].lower) {
@@ -2413,8 +2750,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart,
 
     repeat_inc_ng:
       stkp->u.repeat.count++;
-      if (stkp->u.repeat.count < reg->repeat_range[mem].upper ||
-         IS_REPEAT_INFINITE(reg->repeat_range[mem].upper)) {
+      if (stkp->u.repeat.count < reg->repeat_range[mem].upper) {
         if (stkp->u.repeat.count >= reg->repeat_range[mem].lower) {
           UChar* pcode = stkp->u.repeat.pcode;
 
@@ -2543,6 +2879,14 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart,
       p     = stk->u.state.pcode;
       s     = stk->u.state.pstr;
       sprev = stk->u.state.pstr_prev;
+
+#ifdef USE_COMBINATION_EXPLOSION_CHECK
+      if (stk->u.state.state_check != 0) {
+        stk->type = STK_STATE_CHECK_MARK;
+        stk++;
+      }
+#endif
+
       STAT_OP_OUT;
       continue;
       break;
@@ -2618,20 +2962,12 @@ str_lower_case_match(OnigEncoding enc, int ambig_flag,
   tsave = t;
   psave = p;
 
- retry:
   while (t < tend) {
     lowlen = ONIGENC_MBC_TO_NORMALIZE(enc, ambig_flag, &p, end, lowbuf);
     q = lowbuf;
     while (lowlen > 0) {
       if (*t++ != *q++) {
-        if ((ambig_flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0) {
-          ambig_flag &= ~ONIGENC_AMBIGUOUS_MATCH_COMPOUND;
-          t = tsave;
-          p = psave;
-          goto retry;
-        }
-        else
-          return 0;
+       return 0;
       }
       lowlen--;
     }
@@ -2727,66 +3063,56 @@ bm_search_notrev(regex_t* reg, const UChar* target, const UChar* target_end,
                 const UChar* text, const UChar* text_end,
                 const UChar* text_range)
 {
-  const UChar *s, *t, *p, *end;
+  const UChar *s, *se, *t, *p, *end;
   const UChar *tail;
-  int skip;
+  int skip, tlen1;
 
 #ifdef ONIG_DEBUG_SEARCH
   fprintf(stderr, "bm_search_notrev: text: %d, text_end: %d, text_range: %d\n",
          (int )text, (int )text_end, (int )text_range);
 #endif
 
-  end = text_range + (target_end - target) - 1;
-  if (end > text_end)
-    end = text_end;
-
   tail = target_end - 1;
+  tlen1 = tail - target;
+  end = text_range;
+  if (end + tlen1 > text_end)
+    end = text_end - tlen1;
+
   s = text;
-  while ((s - text) < target_end - target) {
-    s += enc_len(reg->enc, s);
-  }
-  s--; /* set to text check tail position. */
 
   if (IS_NULL(reg->int_map)) {
     while (s < end) {
-      p = s;
+      p = se = s + tlen1;
       t = tail;
       while (t >= target && *p == *t) {
-       p--; t--;
+        p--; t--;
       }
-      if (t < target) return (UChar* )(p + 1);
+      if (t < target) return (UChar* )s;
 
-      skip = reg->map[*s];
-      p = s + 1;
-      if (p >= text_end) return (UChar* )NULL;
-      t = p;
+      skip = reg->map[*se];
+      t = s;
       do {
-       p += enc_len(reg->enc, p);
-      } while ((p - t) < skip && p < text_end);
-
-      s += (p - t);
+        s += enc_len(reg->enc, s);
+      } while ((s - t) < skip && s < end);
     }
   }
   else {
     while (s < end) {
-      p = s;
+      p = se = s + tlen1;
       t = tail;
       while (t >= target && *p == *t) {
-       p--; t--;
+        p--; t--;
       }
-      if (t < target) return (UChar* )(p + 1);
+      if (t < target) return (UChar* )s;
 
-      skip = reg->int_map[*s];
-      p = s + 1;
-      if (p >= text_end) return (UChar* )NULL;
-      t = p;
+      skip = reg->int_map[*se];
+      t = s;
       do {
-       p += enc_len(reg->enc, p);
-      } while ((p - t) < skip && p < text_end);
-
-      s += (p - t);
+        s += enc_len(reg->enc, s);
+      } while ((s - t) < skip && s < end);
     }
   }
+
   return (UChar* )NULL;
 }
 
@@ -2915,7 +3241,9 @@ onig_match(regex_t* reg, const UChar* str, const UChar* end, const UChar* at, On
   UChar *prev;
   MatchArg msa;
 
-#ifdef USE_MULTI_THREAD_SYSTEM
+#if defined(USE_RECOMPILE_API) && defined(USE_MULTI_THREAD_SYSTEM)
+ start:
+  THREAD_ATOMIC_START;
   if (ONIG_STATE(reg) >= ONIG_STATE_NORMAL) {
     ONIG_STATE_INC(reg);
     if (IS_NOT_NULL(reg->chain) && ONIG_STATE(reg) == ONIG_STATE_NORMAL) {
@@ -2924,17 +3252,27 @@ onig_match(regex_t* reg, const UChar* str, const UChar* end, const UChar* at, On
     }
   }
   else {
-    int n = 0;
+    int n;
+
+    THREAD_ATOMIC_END;
+    n = 0;
     while (ONIG_STATE(reg) < ONIG_STATE_NORMAL) {
       if (++n > THREAD_PASS_LIMIT_COUNT)
        return ONIGERR_OVER_THREAD_PASS_LIMIT_COUNT;
       THREAD_PASS;
     }
-    ONIG_STATE_INC(reg);
+    goto start;
   }
-#endif /* USE_MULTI_THREAD_SYSTEM */
+  THREAD_ATOMIC_END;
+#endif /* USE_RECOMPILE_API && USE_MULTI_THREAD_SYSTEM */
 
   MATCH_ARG_INIT(msa, option, region, at);
+#ifdef USE_COMBINATION_EXPLOSION_CHECK
+  {
+    int offset = at - str;
+    STATE_CHECK_BUFF_INIT(msa, end - str, offset, reg->num_comb_exp_check);
+  }
+#endif
 
   if (region
 #ifdef USE_POSIX_REGION_OPTION
@@ -2952,7 +3290,7 @@ onig_match(regex_t* reg, const UChar* str, const UChar* end, const UChar* at, On
   }
 
   MATCH_ARG_FREE(msa);
-  ONIG_STATE_DEC(reg);
+  ONIG_STATE_DEC_THREAD(reg);
   return r;
 }
 
@@ -3029,7 +3367,11 @@ forward_search_range(regex_t* reg, const UChar* str, const UChar* end, UChar* s,
          if (prev && ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end))
            goto retry_gate;
        }
-       else if (!ONIGENC_IS_MBC_NEWLINE(reg->enc, p, end))
+       else if (! ONIGENC_IS_MBC_NEWLINE(reg->enc, p, end)
+#ifdef USE_CRNL_AS_LINE_TERMINATOR
+              && ! ONIGENC_IS_MBC_CRNL(reg->enc, p, end)
+#endif
+                )
          goto retry_gate;
        break;
       }
@@ -3132,7 +3474,7 @@ backward_search_range(regex_t* reg, const UChar* str, const UChar* end,
       switch (reg->sub_anchor) {
       case ANCHOR_BEGIN_LINE:
        if (!ON_STR_BEGIN(p)) {
-         prev = onigenc_get_prev_char_head(reg->enc, adjrange, p);
+         prev = onigenc_get_prev_char_head(reg->enc, str, p);
          if (!ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end)) {
            p = prev;
            goto retry;
@@ -3149,7 +3491,11 @@ backward_search_range(regex_t* reg, const UChar* str, const UChar* end,
            goto retry;
          }
        }
-       else if (!ONIGENC_IS_MBC_NEWLINE(reg->enc, p, end)) {
+       else if (! ONIGENC_IS_MBC_NEWLINE(reg->enc, p, end)
+#ifdef USE_CRNL_AS_LINE_TERMINATOR
+              && ! ONIGENC_IS_MBC_CRNL(reg->enc, p, end)
+#endif
+                ) {
          p = onigenc_get_prev_char_head(reg->enc, adjrange, p);
          if (IS_NULL(p)) goto fail;
          goto retry;
@@ -3187,8 +3533,11 @@ onig_search(regex_t* reg, const UChar* str, const UChar* end,
   int r;
   UChar *s, *prev;
   MatchArg msa;
+  const UChar *orig_start = start;
 
-#ifdef USE_MULTI_THREAD_SYSTEM
+#if defined(USE_RECOMPILE_API) && defined(USE_MULTI_THREAD_SYSTEM)
+ start:
+  THREAD_ATOMIC_START;
   if (ONIG_STATE(reg) >= ONIG_STATE_NORMAL) {
     ONIG_STATE_INC(reg);
     if (IS_NOT_NULL(reg->chain) && ONIG_STATE(reg) == ONIG_STATE_NORMAL) {
@@ -3197,15 +3546,19 @@ onig_search(regex_t* reg, const UChar* str, const UChar* end,
     }
   }
   else {
-    int n = 0;
+    int n;
+
+    THREAD_ATOMIC_END;
+    n = 0;
     while (ONIG_STATE(reg) < ONIG_STATE_NORMAL) {
       if (++n > THREAD_PASS_LIMIT_COUNT)
        return ONIGERR_OVER_THREAD_PASS_LIMIT_COUNT;
       THREAD_PASS;
     }
-    ONIG_STATE_INC(reg);
+    goto start;
   }
-#endif /* USE_MULTI_THREAD_SYSTEM */
+  THREAD_ATOMIC_END;
+#endif /* USE_RECOMPILE_API && USE_MULTI_THREAD_SYSTEM */
 
 #ifdef ONIG_DEBUG_SEARCH
   fprintf(stderr,
@@ -3224,16 +3577,31 @@ onig_search(regex_t* reg, const UChar* str, const UChar* end,
 
   if (start > end || start < str) goto mismatch_no_msa;
 
+#ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
 #define MATCH_AND_RETURN_CHECK \
   r = match_at(reg, str, end, s, prev, &msa);\
   if (r != ONIG_MISMATCH) {\
-    if (r >= 0) goto match;\
-    goto finish; /* error */ \
+    if (r >= 0) {\
+      if (! IS_FIND_LONGEST(reg->options)) {\
+        goto match;\
+      }\
+    }\
+    else goto finish; /* error */ \
   }
+#else
+#define MATCH_AND_RETURN_CHECK \
+  r = match_at(reg, str, end, s, prev, &msa);\
+  if (r != ONIG_MISMATCH) {\
+    if (r >= 0) {\
+      goto match;\
+    }\
+    else goto finish; /* error */ \
+  }
+#endif
 
   /* anchor optimize: resume search range */
   if (reg->anchor != 0 && str < end) {
-    UCharsemi_end;
+    UChar *min_semi_end, *max_semi_end;
 
     if (reg->anchor & ANCHOR_BEGIN_POSITION) {
       /* search start-position only */
@@ -3259,58 +3627,67 @@ onig_search(regex_t* reg, const UChar* str, const UChar* end,
       }
     }
     else if (reg->anchor & ANCHOR_END_BUF) {
-      semi_end = (UChar* )end;
+      min_semi_end = max_semi_end = (UChar* )end;
 
     end_buf:
-      if ((OnigDistance )(semi_end - str) < reg->anchor_dmin)
+      if ((OnigDistance )(max_semi_end - str) < reg->anchor_dmin)
        goto mismatch_no_msa;
 
       if (range > start) {
-       if ((OnigDistance )(semi_end - start) > reg->anchor_dmax) {
-         start = semi_end - reg->anchor_dmax;
+       if ((OnigDistance )(min_semi_end - start) > reg->anchor_dmax) {
+         start = min_semi_end - reg->anchor_dmax;
          if (start < end)
            start = onigenc_get_right_adjust_char_head(reg->enc, str, start);
          else { /* match with empty at end */
            start = onigenc_get_prev_char_head(reg->enc, str, end);
          }
        }
-       if ((OnigDistance )(semi_end - (range - 1)) < reg->anchor_dmin) {
-         range = semi_end - reg->anchor_dmin + 1;
+       if ((OnigDistance )(max_semi_end - (range - 1)) < reg->anchor_dmin) {
+         range = max_semi_end - reg->anchor_dmin + 1;
        }
 
        if (start >= range) goto mismatch_no_msa;
       }
       else {
-       if ((OnigDistance )(semi_end - range) > reg->anchor_dmax) {
-         range = semi_end - reg->anchor_dmax;
+       if ((OnigDistance )(min_semi_end - range) > reg->anchor_dmax) {
+         range = min_semi_end - reg->anchor_dmax;
        }
-       if ((OnigDistance )(semi_end - start) < reg->anchor_dmin) {
-         start = semi_end - reg->anchor_dmin;
+       if ((OnigDistance )(max_semi_end - start) < reg->anchor_dmin) {
+         start = max_semi_end - reg->anchor_dmin;
          start = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, str, start);
-         if (range > start) goto mismatch_no_msa;
        }
+       if (range > start) goto mismatch_no_msa;
       }
     }
     else if (reg->anchor & ANCHOR_SEMI_END_BUF) {
       UChar* pre_end = ONIGENC_STEP_BACK(reg->enc, str, end, 1);
 
+      max_semi_end = (UChar* )end;
       if (ONIGENC_IS_MBC_NEWLINE(reg->enc, pre_end, end)) {
-       semi_end = pre_end;
-       if (semi_end > str && start <= semi_end) {
+       min_semi_end = pre_end;
+
+#ifdef USE_CRNL_AS_LINE_TERMINATOR
+       pre_end = ONIGENC_STEP_BACK(reg->enc, str, pre_end, 1);
+       if (IS_NOT_NULL(pre_end) &&
+           ONIGENC_IS_MBC_CRNL(reg->enc, pre_end, end)) {
+         min_semi_end = pre_end;
+       }
+#endif
+       if (min_semi_end > str && start <= min_semi_end) {
          goto end_buf;
        }
       }
       else {
-       semi_end = (UChar* )end;
+       min_semi_end = (UChar* )end;
        goto end_buf;
       }
     }
-    else if ((reg->anchor & ANCHOR_ANYCHAR_STAR_PL)) {
+    else if ((reg->anchor & ANCHOR_ANYCHAR_STAR_ML)) {
       goto begin_position;
     }
   }
   else if (str == end) { /* empty string */
-    static const UChar* address_for_empty_string = "";
+    static const UChar* address_for_empty_string = (UChar* )"";
 
 #ifdef ONIG_DEBUG_SEARCH
     fprintf(stderr, "onig_search: empty string.\n");
@@ -3322,6 +3699,10 @@ onig_search(regex_t* reg, const UChar* str, const UChar* end,
       prev = (UChar* )NULL;
 
       MATCH_ARG_INIT(msa, option, region, start);
+#ifdef USE_COMBINATION_EXPLOSION_CHECK
+      msa.state_check_buff      = (void* )0;
+      msa.state_check_buff_size = 0;
+#endif
       MATCH_AND_RETURN_CHECK;
       goto mismatch;
     }
@@ -3333,7 +3714,13 @@ onig_search(regex_t* reg, const UChar* str, const UChar* end,
          (int )(end - str), (int )(start - str), (int )(range - str));
 #endif
 
-  MATCH_ARG_INIT(msa, option, region, start);
+  MATCH_ARG_INIT(msa, option, region, orig_start);
+#ifdef USE_COMBINATION_EXPLOSION_CHECK
+  {
+    int offset = (MIN(start, range) - str);
+    STATE_CHECK_BUFF_INIT(msa, end - str, offset, reg->num_comb_exp_check);
+  }
+#endif
 
   s = (UChar* )start;
   if (range > start) {   /* forward search */
@@ -3398,7 +3785,11 @@ onig_search(regex_t* reg, const UChar* str, const UChar* end,
       MATCH_AND_RETURN_CHECK;
       prev = s;
       s += enc_len(reg->enc, s);
-    } while (s <= range);   /* exec s == range, because empty match with /$/. */
+    } while (s < range);
+
+    if (s == range) { /* because empty match with /$/. */
+      MATCH_AND_RETURN_CHECK;
+    }
   }
   else {  /* backward search */
     if (reg->optimize != ONIG_OPTIMIZE_NONE) {
@@ -3457,11 +3848,19 @@ onig_search(regex_t* reg, const UChar* str, const UChar* end,
   }
 
  mismatch:
+#ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
+  if (IS_FIND_LONGEST(reg->options)) {
+    if (msa.best_len >= 0) {
+      s = msa.best_s;
+      goto match;
+    }
+  }
+#endif
   r = ONIG_MISMATCH;
 
  finish:
   MATCH_ARG_FREE(msa);
-  ONIG_STATE_DEC(reg);
+  ONIG_STATE_DEC_THREAD(reg);
 
   /* If result is mismatch and no FIND_NOT_EMPTY option,
      then the region is not setted in match_at(). */
@@ -3482,7 +3881,7 @@ onig_search(regex_t* reg, const UChar* str, const UChar* end,
  mismatch_no_msa:
   r = ONIG_MISMATCH;
  finish_no_msa:
-  ONIG_STATE_DEC(reg);
+  ONIG_STATE_DEC_THREAD(reg);
 #ifdef ONIG_DEBUG
   if (r != ONIG_MISMATCH)
     fprintf(stderr, "onig_search: error %d\n", r);
@@ -3490,7 +3889,7 @@ onig_search(regex_t* reg, const UChar* str, const UChar* end,
   return r;
 
  match:
-  ONIG_STATE_DEC(reg);
+  ONIG_STATE_DEC_THREAD(reg);
   MATCH_ARG_FREE(msa);
   return s - str;
 }
index 6839708be7a374fd4b8616ba5d81ac98dfe257cf..f5ad1f35a29e641a07a5c0c5729039f2f8ef364e 100755 (executable)
@@ -2,7 +2,7 @@
   regext.c -  Oniguruma (regular expression library)
 **********************************************************************/
 /*-
- * Copyright (c) 2002-2005  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp>
+ * Copyright (c) 2002-2006  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -194,6 +194,7 @@ onig_new_deluxe(regex_t** reg, const UChar* pattern, const UChar* pattern_end,
   return r;
 }
 
+#ifdef USE_RECOMPILE_API
 extern int
 onig_recompile_deluxe(regex_t* reg, const UChar* pattern, const UChar* pattern_end,
                       OnigCompileInfo* ci, OnigErrorInfo* einfo)
@@ -211,3 +212,4 @@ onig_recompile_deluxe(regex_t* reg, const UChar* pattern, const UChar* pattern_e
   }
   return 0;
 }
+#endif
index 70e8582ff2a855ed7e13976e761a39ac57ed9bda..248957c9d9a2ecb5000158162e12fb353b2bad33 100644 (file)
@@ -2,7 +2,7 @@
   reggnu.c -  Oniguruma (regular expression library)
 **********************************************************************/
 /*-
- * Copyright (c) 2002-2005  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp>
+ * Copyright (c) 2002-2006  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -93,6 +93,7 @@ re_compile_pattern(const char* pattern, int size, regex_t* reg, char* ebuf)
   return r;
 }
 
+#ifdef USE_RECOMPILE_API
 extern int
 re_recompile_pattern(const char* pattern, int size, regex_t* reg, char* ebuf)
 {
@@ -113,6 +114,7 @@ re_recompile_pattern(const char* pattern, int size, regex_t* reg, char* ebuf)
   }
   return r;
 }
+#endif
 
 extern void
 re_free_pattern(regex_t* reg)
@@ -151,16 +153,16 @@ re_mbcinit(int mb_code)
   OnigEncoding enc;
 
   switch (mb_code) {
-  case MBCTYPE_ASCII:
+  case RE_MBCTYPE_ASCII:
     enc = ONIG_ENCODING_ASCII;
     break;
-  case MBCTYPE_EUC:
+  case RE_MBCTYPE_EUC:
     enc = ONIG_ENCODING_EUC_JP;
     break;
-  case MBCTYPE_SJIS:
+  case RE_MBCTYPE_SJIS:
     enc = ONIG_ENCODING_SJIS;
     break;
-  case MBCTYPE_UTF8:
+  case RE_MBCTYPE_UTF8:
     enc = ONIG_ENCODING_UTF8;
     break;
   default:
index 11f3c43af94b66a6727924fd15c954b9dc11b380..d6819d8f949d4d01dd81f7ff512ec853b7779ac4 100644 (file)
@@ -4,7 +4,7 @@
   regint.h -  Oniguruma (regular expression library)
 **********************************************************************/
 /*-
- * Copyright (c) 2002-2005  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp>
+ * Copyright (c) 2002-2007  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
 #define USE_INFINITE_REPEAT_MONOMANIAC_MEM_STATUS_CHECK /* /(?:()|())*\2/ */
 #define USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE     /* /\n$/ =~ "\n" */
 #define USE_WARNING_REDUNDANT_NESTED_REPEAT_OPERATOR
+/* #define USE_RECOMPILE_API */
+/* treat \r\n as line terminator.
+   !!! NO SUPPORT !!!
+   use this configuration on your own responsibility */
+/* #define USE_CRNL_AS_LINE_TERMINATOR */
+
 /* internal config */
 #define USE_RECYCLE_NODE
 #define USE_OP_PUSH_OR_JUMP_EXACT
-#define USE_QUALIFIER_PEEK_NEXT
+#define USE_QUANTIFIER_PEEK_NEXT
 #define USE_ST_HASH_TABLE
 #define USE_SHARED_CCLASS_TABLE
 
 /* interface to external system */
 #ifdef NOT_RUBY      /* given from Makefile */
 #include "config.h"
+#define USE_BACKREF_AT_LEVEL
 #define USE_CAPTURE_HISTORY
 #define USE_VARIABLE_META_CHARS
 #define USE_WORD_BEGIN_END          /* "\<": word-begin, "\>": word-end */
 #define USE_POSIX_REGION_OPTION     /* needed for POSIX API support */
+#define USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
+/* #define USE_COMBINATION_EXPLOSION_CHECK */     /* (X*)* */
 /* #define USE_MULTI_THREAD_SYSTEM */
+#define THREAD_SYSTEM_INIT          /* depend on thread system */
+#define THREAD_SYSTEM_END           /* depend on thread system */
 #define THREAD_ATOMIC_START         /* depend on thread system */
 #define THREAD_ATOMIC_END           /* depend on thread system */
 #define THREAD_PASS                 /* depend on thread system */
-#define CHECK_INTERRUPT             /* depend on application */
 #define xmalloc     malloc
 #define xrealloc    realloc
 #define xcalloc     calloc
 #define xfree       free
 #else
 #include "ruby.h"
-#include "version.h"
 #include "rubysig.h"      /* for DEFER_INTS, ENABLE_INTS */
 
+#define USE_COMBINATION_EXPLOSION_CHECK        /* (X*)* */
 #define USE_MULTI_THREAD_SYSTEM
+#define THREAD_SYSTEM_INIT
+#define THREAD_SYSTEM_END
 #define THREAD_ATOMIC_START          DEFER_INTS
 #define THREAD_ATOMIC_END            ENABLE_INTS
 #define THREAD_PASS                  rb_thread_schedule()
-#define CHECK_INTERRUPT do {\
-  if (rb_trap_pending) {\
-    if (! rb_prohibit_interrupt) {\
-      rb_trap_exec();\
-    }\
-  }\
-} while (0)
 
-#define DEFAULT_WARN_FUNCTION        rb_warn
-#define DEFAULT_VERB_WARN_FUNCTION   rb_warning
+#define DEFAULT_WARN_FUNCTION        onig_rb_warn
+#define DEFAULT_VERB_WARN_FUNCTION   onig_rb_warning
 
 #endif /* else NOT_RUBY */
 
+#define STATE_CHECK_STRING_THRESHOLD_LEN             7
+#define STATE_CHECK_BUFF_MAX_SIZE               0x4000
+
 #define THREAD_PASS_LIMIT_COUNT     8
 #define xmemset     memset
 #define xmemcpy     memcpy
 #define xmemmove    memmove
 #if defined(_WIN32) && !defined(__GNUC__)
 #define xalloca     _alloca
-#ifdef NOT_RUBY
 #if _MSC_VER < 1500
-# define vsnprintf   _vsnprintf
+#ifndef vsnprintf
+#define vsnprintf   _vsnprintf
 #endif
 #endif
 #else
 #define xalloca     alloca
 #endif
 
+#if defined(USE_RECOMPILE_API) && defined(USE_MULTI_THREAD_SYSTEM)
+#define ONIG_STATE_INC(reg) (reg)->state++
+#define ONIG_STATE_DEC(reg) (reg)->state--
 
-#ifdef USE_MULTI_THREAD_SYSTEM
-#define ONIG_STATE_INC(reg)    (reg)->state++
-#define ONIG_STATE_DEC(reg)    (reg)->state--
+#define ONIG_STATE_INC_THREAD(reg) do {\
+  THREAD_ATOMIC_START;\
+  (reg)->state++;\
+  THREAD_ATOMIC_END;\
+} while(0)
+#define ONIG_STATE_DEC_THREAD(reg) do {\
+  THREAD_ATOMIC_START;\
+  (reg)->state--;\
+  THREAD_ATOMIC_END;\
+} while(0)
 #else
-#define ONIG_STATE_INC(reg)    /* Nothing */
-#define ONIG_STATE_DEC(reg)    /* Nothing */
-#endif /* USE_MULTI_THREAD_SYSTEM */
+#define ONIG_STATE_INC(reg)         /* Nothing */
+#define ONIG_STATE_DEC(reg)         /* Nothing */
+#define ONIG_STATE_INC_THREAD(reg)  /* Nothing */
+#define ONIG_STATE_DEC_THREAD(reg)  /* Nothing */
+#endif /* USE_RECOMPILE_API && USE_MULTI_THREAD_SYSTEM */
 
 
 #define onig_st_is_member              st_is_member
 #endif
 #endif
 
+#ifdef __BORLANDC__
+#include <malloc.h>
+#endif
+
 #ifdef ONIG_DEBUG
 # include <stdio.h>
 #endif
 #define NULL_UCHARP                   ((UChar* )0)
 
 #ifndef PLATFORM_UNALIGNED_WORD_ACCESS
-#define WORD_ALIGNMENT_SIZE       SIZEOF_LONG
+/* sizeof(OnigCodePoint) */
+#define WORD_ALIGNMENT_SIZE     SIZEOF_LONG
 
 #define GET_ALIGNMENT_PAD_SIZE(addr,pad_size) do {\
   (pad_size) = WORD_ALIGNMENT_SIZE \
@@ -520,7 +545,7 @@ typedef struct _BBuf {
 #define ANCHOR_LOOK_BEHIND_NOT  (1<<13)
 
 #define ANCHOR_ANYCHAR_STAR     (1<<14)   /* ".*" optimize info */
-#define ANCHOR_ANYCHAR_STAR_PL  (1<<15)   /* ".*" optimize info (posix-line) */
+#define ANCHOR_ANYCHAR_STAR_ML  (1<<15)   /* ".*" optimize info (multi-line) */
 
 /* operation code */
 enum OpCode {
@@ -560,8 +585,6 @@ enum OpCode {
 
   OP_WORD,
   OP_NOT_WORD,
-  OP_WORD_SB,
-  OP_WORD_MB,
   OP_WORD_BOUND,
   OP_NOT_WORD_BOUND,
   OP_WORD_BEGIN,
@@ -576,11 +599,11 @@ enum OpCode {
 
   OP_BACKREF1,
   OP_BACKREF2,
-  OP_BACKREF3,
   OP_BACKREFN,
   OP_BACKREFN_IC,
   OP_BACKREF_MULTI,
   OP_BACKREF_MULTI_IC,
+  OP_BACKREF_AT_LEVEL,    /* \k<xxx+n>, \k<xxx-n> */
 
   OP_MEMORY_START,
   OP_MEMORY_START_PUSH,   /* push back-tracker to stack */
@@ -620,34 +643,33 @@ enum OpCode {
   OP_FAIL_LOOK_BEHIND_NOT, /* (?<!...) end   */
 
   OP_CALL,                 /* \g<name> */
-  OP_RETURN
-};
+  OP_RETURN,
 
-/* arguments type */
-#define ARG_SPECIAL     -1
-#define ARG_NON          0
-#define ARG_RELADDR      1
-#define ARG_ABSADDR      2
-#define ARG_LENGTH       3
-#define ARG_MEMNUM       4
-#define ARG_OPTION       5
+  OP_STATE_CHECK_PUSH,         /* combination explosion check and push */
+  OP_STATE_CHECK_PUSH_OR_JUMP, /* check ok -> push, else jump  */
+  OP_STATE_CHECK,              /* check only */
+  OP_STATE_CHECK_ANYCHAR_STAR,
+  OP_STATE_CHECK_ANYCHAR_ML_STAR
+};
 
 typedef int RelAddrType;
 typedef int AbsAddrType;
 typedef int LengthType;
 typedef int RepeatNumType;
 typedef short int MemNumType;
+typedef short int StateCheckNumType;
 typedef void* PointerType;
 
-#define SIZE_OPCODE        1
-#define SIZE_RELADDR       sizeof(RelAddrType)
-#define SIZE_ABSADDR       sizeof(AbsAddrType)
-#define SIZE_LENGTH        sizeof(LengthType)
-#define SIZE_MEMNUM        sizeof(MemNumType)
-#define SIZE_REPEATNUM     sizeof(RepeatNumType)
-#define SIZE_OPTION        sizeof(OnigOptionType)
-#define SIZE_CODE_POINT    sizeof(OnigCodePoint)
-#define SIZE_POINTER       sizeof(PointerType)
+#define SIZE_OPCODE           1
+#define SIZE_RELADDR          sizeof(RelAddrType)
+#define SIZE_ABSADDR          sizeof(AbsAddrType)
+#define SIZE_LENGTH           sizeof(LengthType)
+#define SIZE_MEMNUM           sizeof(MemNumType)
+#define SIZE_STATE_CHECK_NUM  sizeof(StateCheckNumType)
+#define SIZE_REPEATNUM        sizeof(RepeatNumType)
+#define SIZE_OPTION           sizeof(OnigOptionType)
+#define SIZE_CODE_POINT       sizeof(OnigCodePoint)
+#define SIZE_POINTER          sizeof(PointerType)
 
 
 #ifdef PLATFORM_UNALIGNED_WORD_ACCESS
@@ -673,6 +695,7 @@ typedef void* PointerType;
 #define GET_REPEATNUM_INC(num,p)   PLATFORM_GET_INC(num,    p, RepeatNumType)
 #define GET_OPTION_INC(option,p)   PLATFORM_GET_INC(option, p, OnigOptionType)
 #define GET_POINTER_INC(ptr,p)     PLATFORM_GET_INC(ptr,    p, PointerType)
+#define GET_STATE_CHECK_NUM_INC(num,p)  PLATFORM_GET_INC(num, p, StateCheckNumType)
 
 /* code point's address must be aligned address. */
 #define GET_CODE_POINT(code,p)   code = *((OnigCodePoint* )(p))
@@ -715,6 +738,12 @@ typedef void* PointerType;
 #define SIZE_OP_CALL                   (SIZE_OPCODE + SIZE_ABSADDR)
 #define SIZE_OP_RETURN                  SIZE_OPCODE
 
+#ifdef USE_COMBINATION_EXPLOSION_CHECK
+#define SIZE_OP_STATE_CHECK            (SIZE_OPCODE + SIZE_STATE_CHECK_NUM)
+#define SIZE_OP_STATE_CHECK_PUSH       (SIZE_OPCODE + SIZE_STATE_CHECK_NUM + SIZE_RELADDR)
+#define SIZE_OP_STATE_CHECK_PUSH_OR_JUMP (SIZE_OPCODE + SIZE_STATE_CHECK_NUM + SIZE_RELADDR)
+#define SIZE_OP_STATE_CHECK_ANYCHAR_STAR (SIZE_OPCODE + SIZE_STATE_CHECK_NUM)
+#endif
 
 #define MC_ESC(enc)               (enc)->meta_char_table.esc
 #define MC_ANYCHAR(enc)           (enc)->meta_char_table.anychar
@@ -723,6 +752,11 @@ typedef void* PointerType;
 #define MC_ONE_OR_MORE_TIME(enc)  (enc)->meta_char_table.one_or_more_time
 #define MC_ANYCHAR_ANYTIME(enc)   (enc)->meta_char_table.anychar_anytime
 
+#define IS_MC_ESC_CODE(code, enc, syn) \
+  ((code) == MC_ESC(enc) && \
+   !IS_SYNTAX_OP2((syn), ONIG_SYN_OP2_INEFFECTIVE_ESCAPE))
+
+
 #define SYN_POSIX_COMMON_OP \
  ( ONIG_SYN_OP_DOT_ANYCHAR | ONIG_SYN_OP_POSIX_BRACKET | \
    ONIG_SYN_OP_DECIMAL_BACKREF | \
@@ -783,13 +817,14 @@ extern void onig_print_statistics P_((FILE* f));
 #endif
 #endif
 
-extern char* onig_error_code_to_format P_((int code));
-extern void  onig_snprintf_with_pattern PV_((char buf[], int bufsize, OnigEncoding enc, char* pat, char* pat_end, char *fmt, ...));
+extern UChar* onig_error_code_to_format P_((int code));
+extern void  onig_snprintf_with_pattern PV_((UChar buf[], int bufsize, OnigEncoding enc, UChar* pat, UChar* pat_end, const UChar *fmt, ...));
 extern int  onig_bbuf_init P_((BBuf* buf, int size));
 extern int  onig_alloc_init P_((regex_t** reg, OnigOptionType option, OnigAmbigType ambig_flag, OnigEncoding enc, OnigSyntaxType* syntax));
 extern int  onig_compile P_((regex_t* reg, const UChar* pattern, const UChar* pattern_end, OnigErrorInfo* einfo));
 extern void onig_chain_reduce P_((regex_t* reg));
 extern void onig_chain_link_add P_((regex_t* to, regex_t* add));
 extern void onig_transfer P_((regex_t* to, regex_t* from));
+extern int  onig_is_code_in_cc P_((OnigEncoding enc, OnigCodePoint code, CClassNode* cc));
 
 #endif /* REGINT_H */
index 58e122f4869cac460f460022ac4c2bb03655bd44..af5c8593e6ea08be6c22e35991da7606723d144a 100644 (file)
@@ -2,7 +2,7 @@
   regparse.c -  Oniguruma (regular expression library)
 **********************************************************************/
 /*-
- * Copyright (c) 2002-2005  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp>
+ * Copyright (c) 2002-2007  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -60,6 +60,20 @@ OnigSyntaxType*  OnigDefaultSyntax = ONIG_SYNTAX_RUBY;
 
 extern void onig_null_warn(const char* s) { }
 
+#ifdef RUBY_PLATFORM
+extern void
+onig_rb_warn(const char* s)
+{
+  rb_warn("%s", s);
+}
+
+extern void
+onig_rb_warning(const char* s)
+{
+  rb_warning("%s", s);
+}
+#endif
+
 #ifdef DEFAULT_WARN_FUNCTION
 static OnigWarnFunc onig_warn = (OnigWarnFunc )DEFAULT_WARN_FUNCTION;
 #else
@@ -305,6 +319,88 @@ typedef struct {
 
 #include "st.h"
 
+typedef struct {
+  unsigned char* s;
+  unsigned char* end;
+} st_strend_key;
+
+static int strend_cmp(st_strend_key*, st_strend_key*);
+static int strend_hash(st_strend_key*);
+
+static struct st_hash_type type_strend_hash = {
+  strend_cmp,
+  strend_hash,
+};
+
+static st_table*
+onig_st_init_strend_table_with_size(int size)
+{
+    return onig_st_init_table_with_size(&type_strend_hash, size);
+}
+
+static int
+onig_st_lookup_strend(st_table *table, const UChar* str_key, const UChar* end_key, st_data_t *value)
+{
+    st_strend_key key;
+
+    key.s   = (unsigned char* )str_key;
+    key.end = (unsigned char* )end_key;
+
+    return onig_st_lookup(table, (st_data_t )(&key), value);
+}
+
+static int
+onig_st_insert_strend(st_table *table, const UChar* str_key, const UChar* end_key, st_data_t value)
+{
+  st_strend_key* key;
+  int result;
+
+  key = (st_strend_key* )xmalloc(sizeof(st_strend_key));
+  key->s   = (unsigned char* )str_key;
+  key->end = (unsigned char* )end_key;
+  result = onig_st_insert(table, (st_data_t )key, value);
+  if (result) {
+    xfree(key);
+  }
+  return result;
+}
+
+static int
+strend_cmp(st_strend_key* x, st_strend_key* y)
+{
+  unsigned char *p, *q;
+  int c;
+
+  if ((x->end - x->s) != (y->end - y->s))
+    return 1;
+
+  p = x->s;
+  q = y->s;
+  while (p < x->end) {
+    c = (int )*p - (int )*q;
+    if (c != 0) return c;
+
+    p++; q++;
+  }
+
+  return 0;
+}
+
+static int
+strend_hash(st_strend_key* x)
+{
+  int val;
+  unsigned char *p;
+
+  val = 0;
+  p = x->s;
+  while (p < x->end) {
+    val = val * 997 + (int )*p++;
+  }
+
+  return val + (val >> 5);
+}
+
 typedef st_table  NameTable;
 typedef st_data_t HashDataType;   /* 1.6 st.h doesn't define st_data_t type */
 
@@ -350,8 +446,10 @@ onig_print_names(FILE* fp, regex_t* reg)
 static int
 i_free_name_entry(UChar* key, NameEntry* e, void* arg)
 {
-  xfree(e->name);  /* == key */
+  xfree(e->name);
   if (IS_NOT_NULL(e->back_refs)) xfree(e->back_refs);
+  xfree(key);
+  xfree(e);
   return ST_DELETE;
 }
 
@@ -801,6 +899,23 @@ onig_number_of_names(regex_t* reg)
 }
 #endif /* else USE_NAMED_GROUP */
 
+extern int
+onig_noname_group_capture_is_active(regex_t* reg)
+{
+  if (ONIG_IS_OPTION_ON(reg->options, ONIG_OPTION_DONT_CAPTURE_GROUP))
+    return 0;
+
+#ifdef USE_NAMED_GROUP
+  if (onig_number_of_names(reg) > 0 &&
+      IS_SYNTAX_BV(reg->syntax, ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP) &&
+      !ONIG_IS_OPTION_ON(reg->options, ONIG_OPTION_CAPTURE_GROUP)) {
+    return 0;
+  }
+#endif
+
+  return 1;
+}
+
 
 #define INIT_SCANENV_MEMNODES_ALLOC_SIZE   16
 
@@ -825,6 +940,13 @@ scan_env_clear(ScanEnv* env)
 
   for (i = 0; i < SCANENV_MEMNODES_SIZE; i++)
     env->mem_nodes_static[i] = NULL_NODE;
+
+#ifdef USE_COMBINATION_EXPLOSION_CHECK
+  env->num_comb_exp_check  = 0;
+  env->comb_exp_max_regnum = 0;
+  env->curr_max_regnum     = 0;
+  env->has_recursion       = 0;
+#endif
 }
 
 static int
@@ -929,9 +1051,9 @@ onig_node_free(Node* node)
     }
     break;
 
-  case N_QUALIFIER:
-    if (NQUALIFIER(node).target)
-      onig_node_free(NQUALIFIER(node).target);
+  case N_QUANTIFIER:
+    if (NQUANTIFIER(node).target)
+      onig_node_free(NQUANTIFIER(node).target);
     break;
 
   case N_EFFECT:
@@ -966,34 +1088,35 @@ onig_node_free(Node* node)
 
 #ifdef USE_RECYCLE_NODE
 extern int
-onig_free_node_list()
+onig_free_node_list(void)
 {
   FreeNode* n;
 
-  THREAD_ATOMIC_START;
-  while (FreeNodeList) {
+  /* THREAD_ATOMIC_START; */
+  while (IS_NOT_NULL(FreeNodeList)) {
     n = FreeNodeList;
     FreeNodeList = FreeNodeList->next;
     xfree(n);
   }
-  THREAD_ATOMIC_END;
+  /* THREAD_ATOMIC_END; */
   return 0;
 }
 #endif
 
 static Node*
-node_new()
+node_new(void)
 {
   Node* node;
 
 #ifdef USE_RECYCLE_NODE
+  THREAD_ATOMIC_START;
   if (IS_NOT_NULL(FreeNodeList)) {
-    THREAD_ATOMIC_START;
     node = (Node* )FreeNodeList;
     FreeNodeList = FreeNodeList->next;
     THREAD_ATOMIC_END;
     return node;
   }
+  THREAD_ATOMIC_END;
 #endif
 
   node = (Node* )xmalloc(sizeof(Node));
@@ -1010,7 +1133,7 @@ initialize_cclass(CClassNode* cc)
 }
 
 static Node*
-node_new_cclass()
+node_new_cclass(void)
 {
   Node* node = node_new();
   CHECK_NULL_RETURN(node);
@@ -1020,9 +1143,9 @@ node_new_cclass()
   return node;
 }
 
-extern Node*
+static Node*
 node_new_cclass_by_codepoint_range(int not,
-                   OnigCodePoint sbr[], OnigCodePoint mbr[])
+                   const OnigCodePoint sbr[], const OnigCodePoint mbr[])
 {
   CClassNode* cc;
   int n, i, j;
@@ -1079,7 +1202,7 @@ node_new_ctype(int type)
 }
 
 static Node*
-node_new_anychar()
+node_new_anychar(void)
 {
   Node* node = node_new();
   CHECK_NULL_RETURN(node);
@@ -1128,7 +1251,11 @@ onig_node_new_anchor(int type)
 }
 
 static Node*
-node_new_backref(int back_num, int* backrefs, int by_name, ScanEnv* env)
+node_new_backref(int back_num, int* backrefs, int by_name,
+#ifdef USE_BACKREF_AT_LEVEL
+                int exist_level, int nest_level,
+#endif
+                ScanEnv* env)
 {
   int i;
   Node* node = node_new();
@@ -1141,6 +1268,13 @@ node_new_backref(int back_num, int* backrefs, int by_name, ScanEnv* env)
   if (by_name != 0)
     NBACKREF(node).state |= NST_NAME_REF;
 
+#ifdef USE_BACKREF_AT_LEVEL
+  if (exist_level != 0) {
+    NBACKREF(node).state |= NST_NEST_LEVEL;
+    NBACKREF(node).nest_level  = nest_level;
+  }
+#endif
+
   for (i = 0; i < back_num; i++) {
     if (backrefs[i] <= env->num_mem &&
        IS_NULL(SCANENV_MEM_NODES(env)[backrefs[i]])) {
@@ -1184,21 +1318,27 @@ node_new_call(UChar* name, UChar* name_end)
 #endif
 
 static Node*
-node_new_qualifier(int lower, int upper, int by_number)
+node_new_quantifier(int lower, int upper, int by_number)
 {
   Node* node = node_new();
   CHECK_NULL_RETURN(node);
-  node->type = N_QUALIFIER;
-  NQUALIFIER(node).state  = 0;
-  NQUALIFIER(node).target = NULL;
-  NQUALIFIER(node).lower  = lower;
-  NQUALIFIER(node).upper  = upper;
-  NQUALIFIER(node).greedy = 1;
-  NQUALIFIER(node).by_number         = by_number;
-  NQUALIFIER(node).target_empty_info = NQ_TARGET_ISNOT_EMPTY;
-  NQUALIFIER(node).head_exact        = NULL_NODE;
-  NQUALIFIER(node).next_head_exact   = NULL_NODE;
-  NQUALIFIER(node).is_refered        = 0;
+  node->type = N_QUANTIFIER;
+  NQUANTIFIER(node).state  = 0;
+  NQUANTIFIER(node).target = NULL;
+  NQUANTIFIER(node).lower  = lower;
+  NQUANTIFIER(node).upper  = upper;
+  NQUANTIFIER(node).greedy = 1;
+  NQUANTIFIER(node).target_empty_info = NQ_TARGET_ISNOT_EMPTY;
+  NQUANTIFIER(node).head_exact        = NULL_NODE;
+  NQUANTIFIER(node).next_head_exact   = NULL_NODE;
+  NQUANTIFIER(node).is_refered        = 0;
+  if (by_number != 0)
+    NQUANTIFIER(node).state |= NST_BY_NUMBER;
+
+#ifdef USE_COMBINATION_EXPLOSION_CHECK
+  NQUANTIFIER(node).comb_exp_check_num = 0;
+#endif
+
   return node;
 }
 
@@ -1341,6 +1481,7 @@ onig_node_new_str(const UChar* s, const UChar* end)
   return node_new_str(s, end);
 }
 
+#ifdef NUMBERED_CHAR_IS_NOT_CASE_AMBIG
 static Node*
 node_new_str_raw(UChar* s, UChar* end)
 {
@@ -1348,20 +1489,21 @@ node_new_str_raw(UChar* s, UChar* end)
   NSTRING_SET_RAW(node);
   return node;
 }
+#endif
 
 static Node*
-node_new_empty()
+node_new_empty(void)
 {
   return node_new_str(NULL, NULL);
 }
 
 static Node*
-node_new_str_raw_char(UChar c)
+node_new_str_char(UChar c)
 {
   UChar p[1];
 
   p[0] = c;
-  return node_new_str_raw(p, p + 1);
+  return node_new_str(p, p + 1);
 }
 
 static Node*
@@ -1391,6 +1533,24 @@ str_node_can_be_split(StrNode* sn, OnigEncoding enc)
   return 0;
 }
 
+#ifdef USE_PAD_TO_SHORT_BYTE_CHAR
+static int
+node_str_head_pad(StrNode* sn, int num, UChar val)
+{
+  UChar buf[NODE_STR_BUF_SIZE];
+  int i, len;
+
+  len = sn->end - sn->s;
+  onig_strcpy(buf, sn->s, sn->end);
+  onig_strcpy(&(sn->s[num]), buf, buf + len);
+  sn->end += num;
+
+  for (i = 0; i < num; i++) {
+    sn->s[i] = val;
+  }
+}
+#endif
+
 extern int
 onig_scan_unsigned_number(UChar** src, const UChar* end, OnigEncoding enc)
 {
@@ -1784,29 +1944,6 @@ and_code_range_buf(BBuf* bbuf1, int not1, BBuf* bbuf2, int not2, BBuf** pbuf)
   return 0;
 }
 
-static int
-clear_not_flag_cclass(CClassNode* cc, OnigEncoding enc)
-{
-  BBuf *tbuf;
-  int r;
-
-  if (IS_CCLASS_NOT(cc)) {
-    bitset_invert(cc->bs);
-
-    if (! ONIGENC_IS_SINGLEBYTE(enc)) {
-      r = not_code_range_buf(enc, cc->mbuf, &tbuf);
-      if (r != 0) return r;
-
-      bbuf_free(cc->mbuf);
-      cc->mbuf = tbuf;
-    }
-
-    CCLASS_CLEAR_NOT(cc);
-  }
-
-  return 0;
-}
-
 static int
 and_cclass(CClassNode* dest, CClassNode* cc, OnigEncoding enc)
 {
@@ -1949,7 +2086,7 @@ conv_backslash_value(int c, ScanEnv* env)
 }
 
 static int
-is_invalid_qualifier_target(Node* node)
+is_invalid_quantifier_target(Node* node)
 {
   switch (NTYPE(node)) {
   case N_ANCHOR:
@@ -1958,19 +2095,19 @@ is_invalid_qualifier_target(Node* node)
 
   case N_EFFECT:
     if (NEFFECT(node).type == EFFECT_OPTION)
-      return is_invalid_qualifier_target(NEFFECT(node).target);
+      return is_invalid_quantifier_target(NEFFECT(node).target);
     break;
 
   case N_LIST: /* ex. (?:\G\A)* */
     do {
-      if (! is_invalid_qualifier_target(NCONS(node).left)) return 0;
+      if (! is_invalid_quantifier_target(NCONS(node).left)) return 0;
     } while (IS_NOT_NULL(node = NCONS(node).right));
     return 0;
     break;
 
   case N_ALT:  /* ex. (?:abc|\A)* */
     do {
-      if (is_invalid_qualifier_target(NCONS(node).left)) return 1;
+      if (is_invalid_quantifier_target(NCONS(node).left)) return 1;
     } while (IS_NOT_NULL(node = NCONS(node).right));
     break;
 
@@ -1982,7 +2119,7 @@ is_invalid_qualifier_target(Node* node)
 
 /* ?:0, *:1, +:2, ??:3, *?:4, +?:5 */
 static int
-popular_qualifier_num(QualifierNode* qf)
+popular_quantifier_num(QuantifierNode* qf)
 {
   if (qf->greedy) {
     if (qf->lower == 0) {
@@ -2013,7 +2150,7 @@ enum ReduceType {
   RQ_AQ,       /* to '*?'   */
   RQ_QQ,       /* to '??'   */
   RQ_P_QQ,     /* to '+)??' */
-  RQ_PQ_Q,     /* to '+?)?' */
+  RQ_PQ_Q      /* to '+?)?' */
 };
 
 static enum ReduceType ReduceTypeTable[6][6] = {
@@ -2026,15 +2163,15 @@ static enum ReduceType ReduceTypeTable[6][6] = {
 };
 
 extern void
-onig_reduce_nested_qualifier(Node* pnode, Node* cnode)
+onig_reduce_nested_quantifier(Node* pnode, Node* cnode)
 {
   int pnum, cnum;
-  QualifierNode *p, *c;
+  QuantifierNode *p, *c;
 
-  p = &(NQUALIFIER(pnode));
-  c = &(NQUALIFIER(cnode));
-  pnum = popular_qualifier_num(p);
-  cnum = popular_qualifier_num(c);
+  p = &(NQUANTIFIER(pnode));
+  c = &(NQUANTIFIER(cnode));
+  pnum = popular_quantifier_num(p);
+  cnum = popular_quantifier_num(c);
 
   switch(ReduceTypeTable[cnum][pnum]) {
   case RQ_DEL:
@@ -2125,6 +2262,10 @@ typedef struct {
       int  ref1;
       int* refs;
       int  by_name;
+#ifdef USE_BACKREF_AT_LEVEL
+      int  exist_level;
+      int  level;   /* \k<name+n> */
+#endif
     } backref;
     struct {
       UChar* name;
@@ -2138,7 +2279,7 @@ typedef struct {
 
 
 static int
-fetch_range_qualifier(UChar** src, UChar* end, OnigToken* tok, ScanEnv* env)
+fetch_range_quantifier(UChar** src, UChar* end, OnigToken* tok, ScanEnv* env)
 {
   int low, up, syn_allow, non_low = 0;
   int r = 0;
@@ -2274,15 +2415,17 @@ fetch_escaped_value(UChar** src, UChar* end, ScanEnv* env)
     control:
       if (PEND) return ONIGERR_END_PATTERN_AT_CONTROL;
       PFETCH(c);
-      if (c == MC_ESC(enc)) {
-       v = fetch_escaped_value(&p, end, env);
-       if (v < 0) return v;
-        c = (OnigCodePoint )v;
-      }
-      else if (c == '?')
+      if (c == '?') {
        c = 0177;
-      else
+      }
+      else {
+        if (c == MC_ESC(enc)) {
+          v = fetch_escaped_value(&p, end, env);
+          if (v < 0) return v;
+          c = (OnigCodePoint )v;
+        }
        c &= 0x9f;
+      }
       break;
     }
     /* fall through */
@@ -2302,6 +2445,89 @@ fetch_escaped_value(UChar** src, UChar* end, ScanEnv* env)
 static int fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env);
 
 #ifdef USE_NAMED_GROUP
+#ifdef USE_BACKREF_AT_LEVEL
+/*
+   \k<name+n>, \k<name-n>
+*/
+static int
+fetch_name_with_level(UChar** src, UChar* end, UChar** rname_end
+                     , ScanEnv* env, int* level)
+{
+  int r, exist_level = 0;
+  OnigCodePoint c = 0;
+  OnigCodePoint first_code;
+  OnigEncoding enc = env->enc;
+  UChar *name_end;
+  UChar *p = *src;
+  PFETCH_READY;
+
+  name_end = end;
+  r = 0;
+  if (PEND) {
+    return ONIGERR_EMPTY_GROUP_NAME;
+  }
+  else {
+    PFETCH(c);
+    first_code = c;
+    if (c == '>')
+      return ONIGERR_EMPTY_GROUP_NAME;
+
+    if (!ONIGENC_IS_CODE_WORD(enc, c)) {
+      r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME;
+    }
+  }
+
+  while (!PEND) {
+    name_end = p;
+    PFETCH(c);
+    if (c == '>' || c == ')' || c == '+' || c == '-') break;
+
+    if (!ONIGENC_IS_CODE_WORD(enc, c)) {
+      r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME;
+    }
+  }
+
+  if (c != '>') {
+    if (c == '+' || c == '-') {
+      int num;
+      int flag = (c == '-' ? -1 : 1);
+
+      PFETCH(c);
+      if (! ONIGENC_IS_CODE_DIGIT(enc, c)) goto err;
+      PUNFETCH;
+      num = onig_scan_unsigned_number(&p, end, enc);
+      if (num < 0) return ONIGERR_TOO_BIG_NUMBER;
+      *level = (num * flag);
+      exist_level = 1;
+
+      PFETCH(c);
+      if (c == '>')
+       goto first_check;
+    }
+
+  err:
+    r = ONIGERR_INVALID_GROUP_NAME;
+    name_end = end;
+  }
+  else {
+  first_check:
+    if (ONIGENC_IS_CODE_ASCII(first_code) &&
+        ONIGENC_IS_CODE_UPPER(enc, first_code))
+      r = ONIGERR_INVALID_GROUP_NAME;
+  }
+
+  if (r == 0) {
+    *rname_end = name_end;
+    *src = p;
+    return (exist_level ? 1 : 0);
+  }
+  else {
+    onig_scan_env_set_error_string(env, r, *src, name_end);
+    return r;
+  }
+}
+#endif /* USE_BACKREF_AT_LEVEL */
+
 /*
   def: 0 -> define name    (don't allow number name)
        1 -> reference name (allow number name)
@@ -2428,11 +2654,11 @@ CC_ESC_WARN(ScanEnv* env, UChar *c)
 
   if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_WARN_CC_OP_NOT_ESCAPED) &&
       IS_SYNTAX_BV(env->syntax, ONIG_SYN_BACKSLASH_ESCAPE_IN_CC)) {
-    char buf[WARN_BUFSIZE];
+    UChar buf[WARN_BUFSIZE];
     onig_snprintf_with_pattern(buf, WARN_BUFSIZE, env->enc,
                env->pattern, env->pattern_end,
-               "character class has '%s' without escape", c);
-    (*onig_warn)(buf);
+                (UChar* )"character class has '%s' without escape", c);
+    (*onig_warn)((char* )buf);
   }
 }
 
@@ -2442,11 +2668,11 @@ CCEND_ESC_WARN(ScanEnv* env, UChar* c)
   if (onig_warn == onig_null_warn) return ;
 
   if (IS_SYNTAX_BV((env)->syntax, ONIG_SYN_WARN_CC_OP_NOT_ESCAPED)) {
-    char buf[WARN_BUFSIZE];
+    UChar buf[WARN_BUFSIZE];
     onig_snprintf_with_pattern(buf, WARN_BUFSIZE, (env)->enc,
                (env)->pattern, (env)->pattern_end,
-               "regular expression has '%s' without escape", c);
-    (*onig_warn)(buf);
+               (UChar* )"regular expression has '%s' without escape", c);
+    (*onig_warn)((char* )buf);
   }
 }
 
@@ -2537,6 +2763,8 @@ fetch_token_in_cc(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)
   tok->type = TK_CHAR;
   tok->base = 0;
   tok->u.c  = c;
+  tok->escaped = 0;
+
   if (c == ']') {
     tok->type = TK_CC_CLOSE;
   }
@@ -2708,7 +2936,7 @@ fetch_token_in_cc(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)
        tok->type = TK_CC_CC_OPEN;
       }
       else {
-       CC_ESC_WARN(env, "[");
+       CC_ESC_WARN(env, (UChar* )"[");
       }
     }
   }
@@ -2747,7 +2975,7 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)
   tok->backp = p;
 
   PFETCH(c);
-  if (c == MC_ESC(enc)) {
+  if (IS_MC_ESC_CODE(c, enc, syn)) {
     if (PEND) return ONIGERR_END_PATTERN_AT_ESCAPE;
 
     tok->backp = p;
@@ -2804,7 +3032,7 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)
 
     case '{':
       if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_BRACE_INTERVAL)) break;
-      r = fetch_range_qualifier(&p, end, tok, env);
+      r = fetch_range_quantifier(&p, end, tok, env);
       if (r < 0) return r;  /* error */
       if (r == 0) goto greedy_check;
       else if (r == 2) { /* {n} */
@@ -3012,6 +3240,9 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)
        tok->u.backref.num     = 1;
        tok->u.backref.ref1    = num;
        tok->u.backref.by_name = 0;
+#ifdef USE_BACKREF_AT_LEVEL
+       tok->u.backref.exist_level = 0;
+#endif
        break;
       }
 
@@ -3050,8 +3281,17 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)
          int* backs;
 
          prev = p;
+
+#ifdef USE_BACKREF_AT_LEVEL
+         name_end = NULL_UCHARP; /* no need. escape gcc warning. */
+         r = fetch_name_with_level(&p, end, &name_end, env, &tok->u.backref.level);
+         if (r == 1) tok->u.backref.exist_level = 1;
+         else        tok->u.backref.exist_level = 0;
+#else
          r = fetch_name(&p, end, &name_end, env, 1);
+#endif
          if (r < 0) return r;
+
          num = onig_name_to_group_numbers(env->reg, prev, name_end, &backs);
          if (num <= 0) {
            onig_scan_env_set_error_string(env,
@@ -3170,13 +3410,17 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)
     switch (c) {
     case '.':
       if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_DOT_ANYCHAR)) break;
+#ifdef USE_VARIABLE_META_CHARS
     any_char:
+#endif
       tok->type = TK_ANYCHAR;
       break;
 
     case '*':
       if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ASTERISK_ZERO_INF)) break;
+#ifdef USE_VARIABLE_META_CHARS
     anytime:
+#endif
       tok->type = TK_OP_REPEAT;
       tok->u.repeat.lower = 0;
       tok->u.repeat.upper = REPEAT_INFINITE;
@@ -3185,7 +3429,9 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)
 
     case '+':
       if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_PLUS_ONE_INF)) break;
+#ifdef USE_VARIABLE_META_CHARS
     one_or_more_time:
+#endif
       tok->type = TK_OP_REPEAT;
       tok->u.repeat.lower = 1;
       tok->u.repeat.upper = REPEAT_INFINITE;
@@ -3194,7 +3440,9 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)
 
     case '?':
       if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_QMARK_ZERO_ONE)) break;
+#ifdef USE_VARIABLE_META_CHARS
     zero_or_one_time:
+#endif
       tok->type = TK_OP_REPEAT;
       tok->u.repeat.lower = 0;
       tok->u.repeat.upper = 1;
@@ -3203,7 +3451,7 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)
 
     case '{':
       if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_BRACE_INTERVAL)) break;
-      r = fetch_range_qualifier(&p, end, tok, env);
+      r = fetch_range_quantifier(&p, end, tok, env);
       if (r < 0) return r;  /* error */
       if (r == 0) goto greedy_check;
       else if (r == 2) { /* {n} */
@@ -3261,7 +3509,7 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)
       if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_LINE_ANCHOR)) break;
       tok->type = TK_ANCHOR;
       tok->u.subtype = (IS_SINGLELINE(env->option)
-                       ? ANCHOR_END_BUF : ANCHOR_END_LINE);
+                       ? ANCHOR_SEMI_END_BUF : ANCHOR_END_LINE);
       break;
 
     case '[':
@@ -3271,7 +3519,7 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)
 
     case ']':
       if (*src > env->pattern)   /* /].../ is allowed. */
-       CCEND_ESC_WARN(env, "]");
+       CCEND_ESC_WARN(env, (UChar* )"]");
       break;
 
     case '#':
@@ -3297,14 +3545,16 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)
     }
   }
 
+#ifdef USE_VARIABLE_META_CHARS
  out:
+#endif
   *src = p;
   return tok->type;
 }
 
 static int
 add_ctype_to_cc_by_range(CClassNode* cc, int ctype, int not, OnigEncoding enc,
-                         OnigCodePoint sbr[], OnigCodePoint mbr[])
+                         const OnigCodePoint sbr[], const OnigCodePoint mbr[])
 {
   int i, r;
   OnigCodePoint j;
@@ -3368,7 +3618,7 @@ static int
 add_ctype_to_cc(CClassNode* cc, int ctype, int not, ScanEnv* env)
 {
   int c, r;
-  OnigCodePoint *sbr, *mbr;
+  const OnigCodePoint *sbr, *mbr;
   OnigEncoding enc = env->enc;
 
   r = ONIGENC_GET_CTYPE_CODE_RANGE(enc, ctype, &sbr, &mbr);
@@ -3506,19 +3756,19 @@ parse_posix_bracket(CClassNode* cc, UChar** src, UChar* end, ScanEnv* env)
 #define POSIX_BRACKET_NAME_MAX_LEN         6
 
   static PosixBracketEntryType PBS[] = {
-    { "alnum",  ONIGENC_CTYPE_ALNUM,  5 },
-    { "alpha",  ONIGENC_CTYPE_ALPHA,  5 },
-    { "blank",  ONIGENC_CTYPE_BLANK,  5 },
-    { "cntrl",  ONIGENC_CTYPE_CNTRL,  5 },
-    { "digit",  ONIGENC_CTYPE_DIGIT,  5 },
-    { "graph",  ONIGENC_CTYPE_GRAPH,  5 },
-    { "lower",  ONIGENC_CTYPE_LOWER,  5 },
-    { "print",  ONIGENC_CTYPE_PRINT,  5 },
-    { "punct",  ONIGENC_CTYPE_PUNCT,  5 },
-    { "space",  ONIGENC_CTYPE_SPACE,  5 },
-    { "upper",  ONIGENC_CTYPE_UPPER,  5 },
-    { "xdigit", ONIGENC_CTYPE_XDIGIT, 6 },
-    { "ascii",  ONIGENC_CTYPE_ASCII,  5 }, /* I don't know origin. Perl? */
+    { (UChar* )"alnum",  ONIGENC_CTYPE_ALNUM,  5 },
+    { (UChar* )"alpha",  ONIGENC_CTYPE_ALPHA,  5 },
+    { (UChar* )"blank",  ONIGENC_CTYPE_BLANK,  5 },
+    { (UChar* )"cntrl",  ONIGENC_CTYPE_CNTRL,  5 },
+    { (UChar* )"digit",  ONIGENC_CTYPE_DIGIT,  5 },
+    { (UChar* )"graph",  ONIGENC_CTYPE_GRAPH,  5 },
+    { (UChar* )"lower",  ONIGENC_CTYPE_LOWER,  5 },
+    { (UChar* )"print",  ONIGENC_CTYPE_PRINT,  5 },
+    { (UChar* )"punct",  ONIGENC_CTYPE_PUNCT,  5 },
+    { (UChar* )"space",  ONIGENC_CTYPE_SPACE,  5 },
+    { (UChar* )"upper",  ONIGENC_CTYPE_UPPER,  5 },
+    { (UChar* )"xdigit", ONIGENC_CTYPE_XDIGIT, 6 },
+    { (UChar* )"ascii",  ONIGENC_CTYPE_ASCII,  5 },
     { (UChar* )NULL, -1, 0 }
   };
 
@@ -3542,7 +3792,7 @@ parse_posix_bracket(CClassNode* cc, UChar** src, UChar* end, ScanEnv* env)
   for (pb = PBS; IS_NOT_NULL(pb->name); pb++) {
     if (onigenc_with_ascii_strncmp(enc, p, end, pb->name, pb->len) == 0) {
       p = (UChar* )onigenc_step(enc, p, end, pb->len);
-      if (onigenc_with_ascii_strncmp(enc, p, end, ":]", 2) != 0)
+      if (onigenc_with_ascii_strncmp(enc, p, end, (UChar* )":]", 2) != 0)
        return ONIGERR_INVALID_POSIX_BRACKET_TYPE;
 
       r = add_ctype_to_cc(cc, pb->ctype, not, env);
@@ -3577,19 +3827,19 @@ static int
 property_name_to_ctype(UChar* p, UChar* end, OnigEncoding enc)
 {
   static PosixBracketEntryType PBS[] = {
-    { "Alnum",  ONIGENC_CTYPE_ALNUM,  5 },
-    { "Alpha",  ONIGENC_CTYPE_ALPHA,  5 },
-    { "Blank",  ONIGENC_CTYPE_BLANK,  5 },
-    { "Cntrl",  ONIGENC_CTYPE_CNTRL,  5 },
-    { "Digit",  ONIGENC_CTYPE_DIGIT,  5 },
-    { "Graph",  ONIGENC_CTYPE_GRAPH,  5 },
-    { "Lower",  ONIGENC_CTYPE_LOWER,  5 },
-    { "Print",  ONIGENC_CTYPE_PRINT,  5 },
-    { "Punct",  ONIGENC_CTYPE_PUNCT,  5 },
-    { "Space",  ONIGENC_CTYPE_SPACE,  5 },
-    { "Upper",  ONIGENC_CTYPE_UPPER,  5 },
-    { "XDigit", ONIGENC_CTYPE_XDIGIT, 6 },
-    { "ASCII",  ONIGENC_CTYPE_ASCII,  5 },
+    { (UChar* )"Alnum",  ONIGENC_CTYPE_ALNUM,  5 },
+    { (UChar* )"Alpha",  ONIGENC_CTYPE_ALPHA,  5 },
+    { (UChar* )"Blank",  ONIGENC_CTYPE_BLANK,  5 },
+    { (UChar* )"Cntrl",  ONIGENC_CTYPE_CNTRL,  5 },
+    { (UChar* )"Digit",  ONIGENC_CTYPE_DIGIT,  5 },
+    { (UChar* )"Graph",  ONIGENC_CTYPE_GRAPH,  5 },
+    { (UChar* )"Lower",  ONIGENC_CTYPE_LOWER,  5 },
+    { (UChar* )"Print",  ONIGENC_CTYPE_PRINT,  5 },
+    { (UChar* )"Punct",  ONIGENC_CTYPE_PUNCT,  5 },
+    { (UChar* )"Space",  ONIGENC_CTYPE_SPACE,  5 },
+    { (UChar* )"Upper",  ONIGENC_CTYPE_UPPER,  5 },
+    { (UChar* )"XDigit", ONIGENC_CTYPE_XDIGIT, 6 },
+    { (UChar* )"ASCII",  ONIGENC_CTYPE_ASCII,  5 },
     { (UChar* )NULL, -1, 0 }
   };
 
@@ -3839,7 +4089,7 @@ parse_char_class(Node** np, OnigToken* tok, UChar** src, UChar* end,
                            *src, env->pattern_end, 1, env->enc))
       return ONIGERR_EMPTY_CHAR_CLASS;
 
-    CC_ESC_WARN(env, "]");
+    CC_ESC_WARN(env, (UChar* )"]");
     r = tok->type = TK_CHAR;  /* allow []...] */
   }
 
@@ -3942,7 +4192,7 @@ parse_char_class(Node** np, OnigToken* tok, UChar** src, UChar* end,
       r = parse_posix_bracket(cc, &p, end, env);
       if (r < 0) goto err;
       if (r == 1) {  /* is not POSIX bracket */
-       CC_ESC_WARN(env, "[");
+       CC_ESC_WARN(env, (UChar* )"[");
        p = tok->backp;
        v = (OnigCodePoint )tok->u.c;
        in_israw = 0;
@@ -3988,7 +4238,7 @@ parse_char_class(Node** np, OnigToken* tok, UChar** src, UChar* end,
          goto val_entry;
        }
        else if (r == TK_CC_AND) {
-         CC_ESC_WARN(env, "-");
+         CC_ESC_WARN(env, (UChar* )"-");
          goto range_end_val;
        }
        state = CCS_RANGE;
@@ -4003,12 +4253,12 @@ parse_char_class(Node** np, OnigToken* tok, UChar** src, UChar* end,
        fetched = 1;
        /* [--x] or [a&&-x] is warned. */
        if (r == TK_CC_RANGE || and_start != 0)
-         CC_ESC_WARN(env, "-");
+         CC_ESC_WARN(env, (UChar* )"-");
 
        goto val_entry;
       }
       else if (state == CCS_RANGE) {
-       CC_ESC_WARN(env, "-");
+       CC_ESC_WARN(env, (UChar* )"-");
        goto sb_char;  /* [!--x] is allowed */
       }
       else { /* CCS_COMPLETE */
@@ -4017,12 +4267,12 @@ parse_char_class(Node** np, OnigToken* tok, UChar** src, UChar* end,
        fetched = 1;
        if (r == TK_CC_CLOSE) goto range_end_val; /* allow [a-b-] */
        else if (r == TK_CC_AND) {
-         CC_ESC_WARN(env, "-");
+         CC_ESC_WARN(env, (UChar* )"-");
          goto range_end_val;
        }
        
        if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_DOUBLE_RANGE_OP_IN_CC)) {
-         CC_ESC_WARN(env, "-");
+         CC_ESC_WARN(env, (UChar* )"-");
          goto sb_char;   /* [0-9-a] is allowed as [0-9\-a] */
        }
        r = ONIGERR_UNMATCHED_RANGE_SPECIFIER_IN_CHAR_CLASS;
@@ -4326,10 +4576,9 @@ parse_effect(Node** np, OnigToken* tok, int term, UChar** src, UChar* end,
     }
   }
   else {
-#ifdef USE_NAMED_GROUP
     if (ONIG_IS_OPTION_ON(env->option, ONIG_OPTION_DONT_CAPTURE_GROUP))
       goto group;
-#endif
+
     *np = node_new_effect_memory(env->option, 0);
     CHECK_NULL_RETURN_VAL(*np, ONIGERR_MEMORY);
     num = scan_env_add_mem_entry(env);
@@ -4358,20 +4607,20 @@ parse_effect(Node** np, OnigToken* tok, int term, UChar** src, UChar* end,
   return 0;
 }
 
-static char* PopularQStr[] = {
+static const char* PopularQStr[] = {
   "?", "*", "+", "??", "*?", "+?"
 };
 
-static char* ReduceQStr[] = {
+static const char* ReduceQStr[] = {
   "", "", "*", "*?", "??", "+ and ??", "+? and ?"
 };
 
 static int
-set_qualifier(Node* qnode, Node* target, int group, ScanEnv* env)
+set_quantifier(Node* qnode, Node* target, int group, ScanEnv* env)
 {
-  QualifierNode* qn;
+  QuantifierNode* qn;
 
-  qn = &(NQUALIFIER(qnode));
+  qn = &(NQUANTIFIER(qnode));
   if (qn->lower == 1 && qn->upper == 1) {
     return 1;
   }
@@ -4390,19 +4639,17 @@ set_qualifier(Node* qnode, Node* target, int group, ScanEnv* env)
     }
     break;
 
-  case N_QUALIFIER:
+  case N_QUANTIFIER:
     { /* check redundant double repeat. */
       /* verbose warn (?:.?)? etc... but not warn (.?)? etc... */
-      QualifierNode* qnt = &(NQUALIFIER(target));
+      QuantifierNode* qnt = &(NQUANTIFIER(target));
+      int nestq_num   = popular_quantifier_num(qn);
+      int targetq_num = popular_quantifier_num(qnt);
 
 #ifdef USE_WARNING_REDUNDANT_NESTED_REPEAT_OPERATOR
-      if (qn->by_number == 0 && qnt->by_number == 0 &&
+      if (!IS_QUANTIFIER_BY_NUMBER(qn) && !IS_QUANTIFIER_BY_NUMBER(qnt) &&
          IS_SYNTAX_BV(env->syntax, ONIG_SYN_WARN_REDUNDANT_NESTED_REPEAT)) {
-        int nestq_num, targetq_num;
-        char buf[WARN_BUFSIZE];
-
-        nestq_num   = popular_qualifier_num(qn);
-        targetq_num = popular_qualifier_num(qnt);
+        UChar buf[WARN_BUFSIZE];
 
         switch(ReduceTypeTable[targetq_num][nestq_num]) {
         case RQ_ASIS:
@@ -4411,9 +4658,9 @@ set_qualifier(Node* qnode, Node* target, int group, ScanEnv* env)
         case RQ_DEL:
           if (onig_verb_warn != onig_null_warn) {
             onig_snprintf_with_pattern(buf, WARN_BUFSIZE, env->enc,
-                                       env->pattern, env->pattern_end,
-                                       "redundant nested repeat operator");
-            (*onig_verb_warn)(buf);
+                                 env->pattern, env->pattern_end,
+                                 (UChar* )"redundant nested repeat operator");
+            (*onig_verb_warn)((char* )buf);
           }
           goto warn_exit;
           break;
@@ -4422,10 +4669,10 @@ set_qualifier(Node* qnode, Node* target, int group, ScanEnv* env)
           if (onig_verb_warn != onig_null_warn) {
             onig_snprintf_with_pattern(buf, WARN_BUFSIZE, env->enc,
                                        env->pattern, env->pattern_end,
-            "nested repeat operator %s and %s was replaced with '%s'",
+            (UChar* )"nested repeat operator %s and %s was replaced with '%s'",
             PopularQStr[targetq_num], PopularQStr[nestq_num],
             ReduceQStr[ReduceTypeTable[targetq_num][nestq_num]]);
-            (*onig_verb_warn)(buf);
+            (*onig_verb_warn)((char* )buf);
           }
           goto warn_exit;
           break;
@@ -4434,9 +4681,17 @@ set_qualifier(Node* qnode, Node* target, int group, ScanEnv* env)
 
     warn_exit:
 #endif
-      if (popular_qualifier_num(qnt) >= 0 && popular_qualifier_num(qn) >= 0) {
-       onig_reduce_nested_qualifier(qnode, target);
-       goto q_exit;
+      if (targetq_num >= 0) {
+       if (nestq_num >= 0) {
+         onig_reduce_nested_quantifier(qnode, target);
+         goto q_exit;
+       }
+       else if (targetq_num == 1 || targetq_num == 2) { /* * or + */
+         /* (?:a*){n,m}, (?:a+){n,m} => (?:a*){n,n}, (?:a+){n,n} */
+         if (! IS_REPEAT_INFINITE(qn->upper) && qn->upper > 1 && qn->greedy) {
+           qn->upper = (qn->lower == 0 ? 1 : qn->lower);
+         }
+       }
       }
     }
     break;
@@ -4450,61 +4705,6 @@ set_qualifier(Node* qnode, Node* target, int group, ScanEnv* env)
   return 0;
 }
 
-static int
-make_compound_alt_node_from_cc(OnigAmbigType ambig_flag, OnigEncoding enc,
-                               CClassNode* cc, Node** root)
-{
-  int r, i, j, k, clen, len, ncode, n;
-  UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN];
-  Node **ptail, *snode = NULL_NODE;
-  OnigCompAmbigCodes* ccs;
-  OnigCompAmbigCodeItem* ci;
-  OnigAmbigType amb;
-
-  n = 0;
-  *root = NULL_NODE;
-  ptail = root;
-
-
-  for (amb = 0x01; amb <= ONIGENC_AMBIGUOUS_MATCH_LIMIT; amb <<= 1) {
-    if ((amb & ambig_flag) == 0)  continue;
-
-    ncode = ONIGENC_GET_ALL_COMP_AMBIG_CODES(enc, amb, &ccs);
-    for (i = 0; i < ncode; i++) {
-      if (onig_is_code_in_cc(enc, ccs[i].code, cc)) {
-        for (j = 0; j < ccs[i].n; j++) {
-          ci = &(ccs[i].items[j]);
-          if (ci->len > 1) { /* compound only */
-            if (IS_CCLASS_NOT(cc)) clear_not_flag_cclass(cc, enc);
-
-            clen = ci->len;
-            for (k = 0; k < clen; k++) {
-              len = ONIGENC_CODE_TO_MBC(enc, ci->code[k], buf);
-
-              if (k == 0) {
-                snode = node_new_str_raw(buf, buf + len);
-                CHECK_NULL_RETURN_VAL(snode, ONIGERR_MEMORY);
-              }
-              else {
-                r = onig_node_str_cat(snode, buf, buf + len);
-                if (r < 0) return r;
-              }
-            }
-
-            *ptail = node_new_alt(snode, NULL_NODE);
-            CHECK_NULL_RETURN_VAL(*ptail, ONIGERR_MEMORY);
-            ptail = &(NCONS(*ptail).right);
-            n++;
-        }
-        }
-      }
-    }
-  }
-
-  return n;
-}
-
-
 #ifdef USE_SHARED_CCLASS_TABLE
 
 #define THRESHOLD_RANGE_NUM_FOR_SHARE_CCLASS     8
@@ -4546,27 +4746,9 @@ static int type_cclass_hash(type_cclass_key* key)
   return val + (val >> 5);
 }
 
-static int type_cclass_key_free(st_data_t x)
-{
-  xfree((void* )x);
-  return 0;
-}
-
-static st_data_t type_cclass_key_clone(st_data_t x)
-{
-  type_cclass_key* new_key;
-  type_cclass_key* key = (type_cclass_key* )x;
-
-  new_key = (type_cclass_key* )xmalloc(sizeof(type_cclass_key));
-  *new_key = *key;
-  return (st_data_t )new_key;
-}
-
 static struct st_hash_type type_type_cclass_hash = {
     type_cclass_cmp,
     type_cclass_hash,
-    type_cclass_key_free,
-    type_cclass_key_clone
 };
 
 static st_table* OnigTypeCClassTable;
@@ -4580,14 +4762,18 @@ i_free_shared_class(type_cclass_key* key, Node* node, void* arg)
     if (IS_NOT_NULL(cc->mbuf)) xfree(cc->mbuf);
     xfree(node);
   }
+
+  if (IS_NOT_NULL(key)) xfree(key);
   return ST_DELETE;
 }
 
 extern int
-onig_free_shared_cclass_table()
+onig_free_shared_cclass_table(void)
 {
   if (IS_NOT_NULL(OnigTypeCClassTable)) {
     onig_st_foreach(OnigTypeCClassTable, i_free_shared_class, 0);
+    onig_st_free_table(OnigTypeCClassTable);
+    OnigTypeCClassTable = NULL;
   }
 
   return 0;
@@ -4667,23 +4853,36 @@ parse_exp(Node** np, OnigToken* tok, int term,
   case TK_RAW_BYTE:
   tk_raw_byte:
     {
-      *np = node_new_str_raw_char((UChar )tok->u.c);
+      *np = node_new_str_char((UChar )tok->u.c);
       CHECK_NULL_RETURN_VAL(*np, ONIGERR_MEMORY);
       len = 1;
       while (1) {
+       if (len >= ONIGENC_MBC_MINLEN(env->enc)) {
+         if (len == enc_len(env->enc, NSTRING(*np).s)) {
+           r = fetch_token(tok, src, end, env);
+           goto string_end;
+         }
+       }
+
        r = fetch_token(tok, src, end, env);
        if (r < 0) return r;
        if (r != TK_RAW_BYTE) {
-#ifndef NUMBERED_CHAR_IS_NOT_CASE_AMBIG
-         if (len >= enc_len(env->enc, NSTRING(*np).s)) {
-           NSTRING_CLEAR_RAW(*np);
+#ifdef USE_PAD_TO_SHORT_BYTE_CHAR
+         int rem;
+         if (len < ONIGENC_MBC_MINLEN(env->enc)) {
+           rem = ONIGENC_MBC_MINLEN(env->enc) - len;
+           (void )node_str_head_pad(&NSTRING(*np), rem, (UChar )0);
+           if (len + rem == enc_len(env->enc, NSTRING(*np).s)) {
+             goto string_end;
+           }
          }
 #endif
-          goto string_end;
+         return ONIGERR_TOO_SHORT_MULTI_BYTE_STRING;
        }
 
        r = node_str_cat_char(*np, (UChar )tok->u.c);
        if (r < 0) return r;
+
        len++;
       }
     }
@@ -4741,7 +4940,7 @@ parse_exp(Node** np, OnigToken* tok, int term,
          int ctype, not;
 
 #ifdef USE_SHARED_CCLASS_TABLE
-          OnigCodePoint *sbr, *mbr;
+          const OnigCodePoint *sbr, *mbr;
 
          ctype = parse_ctype_to_enc_ctype(tok->u.subtype, &not);
           r = ONIGENC_GET_CTYPE_CODE_RANGE(env->enc, ctype, &sbr, &mbr);
@@ -4823,7 +5022,7 @@ parse_exp(Node** np, OnigToken* tok, int term,
 
       if (IS_IGNORECASE(env->option)) {
         int i, n, in_cc;
-        OnigPairAmbigCodes* ccs;
+        const OnigPairAmbigCodes* ccs;
         BitSetRef bs = cc->bs;
         OnigAmbigType amb;
 
@@ -4854,24 +5053,6 @@ parse_exp(Node** np, OnigToken* tok, int term,
           }
         }
       }
-
-      if (IS_IGNORECASE(env->option) &&
-          (env->ambig_flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0) {
-        int res;
-        Node *alt_root, *work;
-
-        res = make_compound_alt_node_from_cc(env->ambig_flag, env->enc,
-                                             cc, &alt_root);
-        if (res < 0) return res;
-        if (res > 0) {
-          work = node_new_alt(*np, alt_root);
-          if (IS_NULL(work)) {
-            onig_node_free(alt_root);
-            return ONIGERR_MEMORY;
-          }
-          *np = work;
-        }
-      }
     }
     break;
 
@@ -4883,17 +5064,22 @@ parse_exp(Node** np, OnigToken* tok, int term,
   case TK_ANYCHAR_ANYTIME:
     *np = node_new_anychar();
     CHECK_NULL_RETURN_VAL(*np, ONIGERR_MEMORY);
-    qn = node_new_qualifier(0, REPEAT_INFINITE, 0);
+    qn = node_new_quantifier(0, REPEAT_INFINITE, 0);
     CHECK_NULL_RETURN_VAL(qn, ONIGERR_MEMORY);
-    NQUALIFIER(qn).target = *np;
+    NQUANTIFIER(qn).target = *np;
     *np = qn;
     break;
 
   case TK_BACKREF:
     len = tok->u.backref.num;
     *np = node_new_backref(len,
-              (len > 1 ? tok->u.backref.refs : &(tok->u.backref.ref1)),
-               tok->u.backref.by_name, env);
+                  (len > 1 ? tok->u.backref.refs : &(tok->u.backref.ref1)),
+                          tok->u.backref.by_name,
+#ifdef USE_BACKREF_AT_LEVEL
+                          tok->u.backref.exist_level,
+                          tok->u.backref.level,
+#endif
+                          env);
     CHECK_NULL_RETURN_VAL(*np, ONIGERR_MEMORY);
     break;
 
@@ -4936,14 +5122,14 @@ parse_exp(Node** np, OnigToken* tok, int term,
 
   repeat:
     if (r == TK_OP_REPEAT || r == TK_INTERVAL) {
-      if (is_invalid_qualifier_target(*targetp))
+      if (is_invalid_quantifier_target(*targetp))
        return ONIGERR_TARGET_OF_REPEAT_OPERATOR_INVALID;
 
-      qn = node_new_qualifier(tok->u.repeat.lower, tok->u.repeat.upper,
+      qn = node_new_quantifier(tok->u.repeat.lower, tok->u.repeat.upper,
                              (r == TK_INTERVAL ? 1 : 0));
       CHECK_NULL_RETURN_VAL(qn, ONIGERR_MEMORY);
-      NQUALIFIER(qn).greedy = tok->u.repeat.greedy;
-      r = set_qualifier(qn, *targetp, group, env);
+      NQUANTIFIER(qn).greedy = tok->u.repeat.greedy;
+      r = set_quantifier(qn, *targetp, group, env);
       if (r < 0) return r;
       
       if (tok->u.repeat.possessive != 0) {
index 1a4ac7dea24879f6dae7c6581f7ffb4649ad2f9a..b25618a33f996a12bbea079262dc245b2cd85445 100644 (file)
@@ -4,7 +4,7 @@
   regparse.h -  Oniguruma (regular expression library)
 **********************************************************************/
 /*-
- * Copyright (c) 2002-2005  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp>
+ * Copyright (c) 2002-2007  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -37,7 +37,7 @@
 #define N_CTYPE        (1<< 2)
 #define N_ANYCHAR      (1<< 3)
 #define N_BACKREF      (1<< 4)
-#define N_QUALIFIER    (1<< 5)
+#define N_QUANTIFIER   (1<< 5)
 #define N_EFFECT       (1<< 6)
 #define N_ANCHOR       (1<< 7)
 #define N_LIST         (1<< 8)
@@ -52,7 +52,7 @@
 #define NSTRING(node)      ((node)->u.str)
 #define NCCLASS(node)      ((node)->u.cclass)
 #define NCTYPE(node)       ((node)->u.ctype)
-#define NQUALIFIER(node)   ((node)->u.qualifier)
+#define NQUANTIFIER(node)  ((node)->u.quantifier)
 #define NANCHOR(node)      ((node)->u.anchor)
 #define NBACKREF(node)     ((node)->u.backref)
 #define NEFFECT(node)      ((node)->u.effect)
@@ -67,7 +67,7 @@
 #define CTYPE_XDIGIT            (1<<6)
 #define CTYPE_NOT_XDIGIT        (1<<7)
 
-#define ANCHOR_ANYCHAR_STAR_MASK (ANCHOR_ANYCHAR_STAR | ANCHOR_ANYCHAR_STAR_PL)
+#define ANCHOR_ANYCHAR_STAR_MASK (ANCHOR_ANYCHAR_STAR | ANCHOR_ANYCHAR_STAR_ML)
 #define ANCHOR_END_BUF_MASK      (ANCHOR_END_BUF | ANCHOR_SEMI_END_BUF)
 
 #define EFFECT_MEMORY           (1<<0)
@@ -76,7 +76,7 @@
 
 #define NODE_STR_MARGIN         16
 #define NODE_STR_BUF_SIZE       24  /* sizeof(CClassNode) - sizeof(int)*4 */
-#define NODE_BACKREFS_SIZE       7
+#define NODE_BACKREFS_SIZE       6
 
 #define NSTR_RAW                (1<<0) /* by backslashed number */
 #define NSTR_AMBIG              (1<<1)
@@ -124,12 +124,14 @@ typedef struct {
   int lower;
   int upper;
   int greedy;
-  int by_number;         /* {n,m} */
   int target_empty_info;
   struct _Node* head_exact;
   struct _Node* next_head_exact;
   int is_refered;     /* include called node. don't eliminate even if {0} */
-} QualifierNode;
+#ifdef USE_COMBINATION_EXPLOSION_CHECK
+  int comb_exp_check_num;  /* 1,2,3...: check,  0: no check  */
+#endif
+} QuantifierNode;
 
 /* status bits */
 #define NST_MIN_FIXED             (1<<0)
@@ -145,6 +147,8 @@ typedef struct {
 #define NST_NAMED_GROUP           (1<<10)
 #define NST_NAME_REF              (1<<11)
 #define NST_IN_REPEAT             (1<<12) /* STK_REPEAT is nested in stack. */
+#define NST_NEST_LEVEL            (1<<13)
+#define NST_BY_NUMBER             (1<<14) /* {n,m} */
 
 #define SET_EFFECT_STATUS(node,f)      (node)->u.effect.state |=  (f)
 #define CLEAR_EFFECT_STATUS(node,f)    (node)->u.effect.state &= ~(f)
@@ -165,7 +169,9 @@ typedef struct {
 #define IS_CALL_RECURSION(cn)          (((cn)->state & NST_RECURSION)  != 0)
 #define IS_CALL_NAME_REF(cn)           (((cn)->state & NST_NAME_REF)   != 0)
 #define IS_BACKREF_NAME_REF(bn)        (((bn)->state & NST_NAME_REF)   != 0)
-#define IS_QUALIFIER_IN_REPEAT(qn)     (((qn)->state & NST_IN_REPEAT)  != 0)
+#define IS_BACKREF_NEST_LEVEL(bn)      (((bn)->state & NST_NEST_LEVEL) != 0)
+#define IS_QUANTIFIER_IN_REPEAT(qn)     (((qn)->state & NST_IN_REPEAT)  != 0)
+#define IS_QUANTIFIER_BY_NUMBER(qn)     (((qn)->state & NST_BY_NUMBER)  != 0)
 
 typedef struct {
   int state;
@@ -212,6 +218,7 @@ typedef struct {
   int     back_num;
   int     back_static[NODE_BACKREFS_SIZE];
   int*    back_dynamic;
+  int     nest_level;
 } BackrefNode;
 
 typedef struct {
@@ -223,15 +230,15 @@ typedef struct {
 typedef struct _Node {
   int type;
   union {
-    StrNode       str;
-    CClassNode    cclass;
-    QualifierNode qualifier;
-    EffectNode    effect;
+    StrNode        str;
+    CClassNode     cclass;
+    QuantifierNode quantifier;
+    EffectNode     effect;
 #ifdef USE_SUBEXP_CALL
-    CallNode      call;
+    CallNode       call;
 #endif
-    BackrefNode   backref;
-    AnchorNode    anchor;
+    BackrefNode    backref;
+    AnchorNode     anchor;
     struct {
       struct _Node* left;
       struct _Node* right;
@@ -274,6 +281,12 @@ typedef struct {
   int             mem_alloc;
   Node*           mem_nodes_static[SCANENV_MEMNODES_SIZE];
   Node**          mem_nodes_dynamic;
+#ifdef USE_COMBINATION_EXPLOSION_CHECK
+  int num_comb_exp_check;
+  int comb_exp_max_regnum;
+  int curr_max_regnum;
+  int has_recursion;
+#endif
 } ScanEnv;
 
 
@@ -290,11 +303,10 @@ typedef struct {
 extern int    onig_renumber_name_table P_((regex_t* reg, GroupNumRemap* map));
 #endif
 
-extern int    onig_is_code_in_cc P_((OnigEncoding enc, OnigCodePoint code, CClassNode* cc));
 extern int    onig_strncmp P_((const UChar* s1, const UChar* s2, int n));
 extern void   onig_scan_env_set_error_string P_((ScanEnv* env, int ecode, UChar* arg, UChar* arg_end));
 extern int    onig_scan_unsigned_number P_((UChar** src, const UChar* end, OnigEncoding enc));
-extern void   onig_reduce_nested_qualifier P_((Node* pnode, Node* cnode));
+extern void   onig_reduce_nested_quantifier P_((Node* pnode, Node* cnode));
 extern void   onig_node_conv_to_str_node P_((Node* node, int raw));
 extern int    onig_node_str_cat P_((Node* node, const UChar* s, const UChar* end));
 extern void   onig_node_free P_((Node* node));
@@ -303,7 +315,7 @@ extern Node*  onig_node_new_anchor P_((int type));
 extern Node*  onig_node_new_str P_((const UChar* s, const UChar* end));
 extern Node*  onig_node_new_list P_((Node* left, Node* right));
 extern void   onig_node_str_clear P_((Node* node));
-extern int    onig_free_node_list();
+extern int    onig_free_node_list P_((void));
 extern int    onig_names_free P_((regex_t* reg));
 extern int    onig_parse_make_tree P_((Node** root, const UChar* pattern, const UChar* end, regex_t* reg, ScanEnv* env));
 
index 34cbeb9a46f065abb0565c0b154562d8ec8772c4..a3bacf722e8638ceb6f4386922bd2ebe720369d0 100644 (file)
@@ -2,7 +2,7 @@
   regposix.c - Oniguruma (regular expression library)
 **********************************************************************/
 /*-
- * Copyright (c) 2002-2005  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp>
+ * Copyright (c) 2002-2006  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -55,7 +55,7 @@ typedef struct {
 static int
 onig2posix_error_code(int code)
 {
-  static O2PERR o2p[] = {
+  static const O2PERR o2p[] = {
     { ONIG_MISMATCH,                                      REG_NOMATCH },
     { ONIG_NO_SUPPORT_CONFIG,                             REG_EONIG_INTERNAL },
     { ONIGERR_MEMORY,                                     REG_ESPACE  },
@@ -192,7 +192,7 @@ regexec(regex_t* reg, const char* str, size_t nmatch,
   ENC_STRING_LEN(ONIG_C(reg)->enc, str, len);
   end = (UChar* )(str + len);
   r = onig_search(ONIG_C(reg), (UChar* )str, end, (UChar* )str, end,
-                 (OnigRegion* )pmatch, options);
+                 (OnigRegion* )pm, options);
 
   if (r >= 0) {
     r = 0; /* Match */
@@ -212,6 +212,11 @@ regexec(regex_t* reg, const char* str, size_t nmatch,
   if (pm != pmatch && pm != NULL)
     xfree(pm);
 
+#if 0
+  if (reg->re_nsub > nmatch - 1)
+    reg->re_nsub = (nmatch <= 1 ? 0 : nmatch - 1);
+#endif
+
   return r;
 }
 
index a0f36b8c33dda15d2c6dd8eab4a7624e3d20e609..9114e39e6b3f3f8fd181aea249c4196233ae8edb 100644 (file)
@@ -2,7 +2,7 @@
   regsyntax.c -  Oniguruma (regular expression library)
 **********************************************************************/
 /*-
- * Copyright (c) 2002-2004  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp>
+ * Copyright (c) 2002-2006  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
 
 #include "regint.h"
 
+OnigSyntaxType OnigSyntaxASIS = {
+    0
+  , ONIG_SYN_OP2_INEFFECTIVE_ESCAPE
+  , 0
+  , ONIG_OPTION_NONE
+};
+
 OnigSyntaxType OnigSyntaxPosixBasic = {
   ( SYN_POSIX_COMMON_OP | ONIG_SYN_OP_ESC_LPAREN_SUBEXP |
     ONIG_SYN_OP_ESC_BRACE_INTERVAL )
@@ -63,7 +70,7 @@ OnigSyntaxType OnigSyntaxEmacs = {
 
 OnigSyntaxType OnigSyntaxGrep = {
   ( ONIG_SYN_OP_DOT_ANYCHAR | ONIG_SYN_OP_BRACKET_CC | ONIG_SYN_OP_POSIX_BRACKET |
-    ONIG_SYN_OP_BRACE_INTERVAL | ONIG_SYN_OP_ESC_LPAREN_SUBEXP |
+    ONIG_SYN_OP_ESC_BRACE_INTERVAL | ONIG_SYN_OP_ESC_LPAREN_SUBEXP |
     ONIG_SYN_OP_ESC_VBAR_ALT |
     ONIG_SYN_OP_ASTERISK_ZERO_INF | ONIG_SYN_OP_ESC_PLUS_ONE_INF |
     ONIG_SYN_OP_ESC_QMARK_ZERO_ONE | ONIG_SYN_OP_LINE_ANCHOR |
@@ -110,6 +117,28 @@ OnigSyntaxType OnigSyntaxPerl = {
   , ONIG_OPTION_SINGLELINE
 };
 
+/* Perl + named group */
+OnigSyntaxType OnigSyntaxPerl_NG = {
+  (( SYN_GNU_REGEX_OP | ONIG_SYN_OP_QMARK_NON_GREEDY |
+     ONIG_SYN_OP_ESC_OCTAL3 | ONIG_SYN_OP_ESC_X_HEX2 |
+     ONIG_SYN_OP_ESC_X_BRACE_HEX8 | ONIG_SYN_OP_ESC_CONTROL_CHARS |
+     ONIG_SYN_OP_ESC_C_CONTROL )
+   & ~ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END )
+  , ( ONIG_SYN_OP2_ESC_CAPITAL_Q_QUOTE |
+      ONIG_SYN_OP2_QMARK_GROUP_EFFECT | ONIG_SYN_OP2_OPTION_PERL |
+      ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY  |
+      ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT |
+      ONIG_SYN_OP2_CHAR_PROPERTY_PREFIX_IS    |
+      ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP       |
+      ONIG_SYN_OP2_ESC_K_NAMED_BACKREF        |
+      ONIG_SYN_OP2_ESC_G_SUBEXP_CALL )
+  , ( SYN_GNU_REGEX_BV |
+      ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP |
+      ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME )
+  , ONIG_OPTION_SINGLELINE
+};
+
+
 
 extern int
 onig_set_default_syntax(OnigSyntaxType* syntax)
index 5f15c10e65204dfa27ba5ac6f23cf6713a443b0d..5fad0cc18c337c2212cea9922c140b75c8ba6e99 100644 (file)
@@ -2,7 +2,7 @@
   regversion.c -  Oniguruma (regular expression library)
 **********************************************************************/
 /*-
- * Copyright (c) 2002-2005  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp>
+ * Copyright (c) 2002-2006  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -47,7 +47,7 @@ onig_copyright(void)
 {
   static char s[58];
 
-  sprintf(s, "Oniguruma %d.%d.%d : Copyright (C) 2002-2005 K.Kosako",
+  sprintf(s, "Oniguruma %d.%d.%d : Copyright (C) 2002-2006 K.Kosako",
           ONIGURUMA_VERSION_MAJOR,
           ONIGURUMA_VERSION_MINOR,
           ONIGURUMA_VERSION_TEENY);
index 65c2cc58bd797be3ea039d8905b25ef8a028ea25..2324da263515f48e31c2b7d074e3fb67d71fda46 100644 (file)
@@ -56,8 +56,6 @@ static int numhash(long);
 static struct st_hash_type type_numhash = {
     numcmp,
     numhash,
-    st_nothing_key_free,
-    st_nothing_key_clone
 };
 
 /* extern int strcmp(const char *, const char *); */
@@ -65,20 +63,6 @@ static int strhash(const char *);
 static struct st_hash_type type_strhash = {
     strcmp,
     strhash,
-    st_nothing_key_free,
-    st_nothing_key_clone
-};
-
-static int strend_cmp(st_strend_key*, st_strend_key*);
-static int strend_hash(st_strend_key*);
-static int strend_key_free(st_data_t key);
-static st_data_t strend_key_clone(st_data_t x);
-
-static struct st_hash_type type_strend_hash = {
-    strend_cmp,
-    strend_hash,
-    strend_key_free,
-    strend_key_clone
 };
 
 static void rehash(st_table *);
@@ -100,7 +84,7 @@ static void rehash(st_table *);
 /*
 Table of prime numbers 2^n+a, 2<=n<=30.
 */
-static long primes[] = {
+static const long primes[] = {
        8 + 3,
        16 + 3,
        32 + 5,
@@ -228,13 +212,6 @@ st_init_strtable_with_size(size)
     return st_init_table_with_size(&type_strhash, size);
 }
 
-st_table*
-st_init_strend_table_with_size(size)
-    int size;
-{
-    return st_init_table_with_size(&type_strend_hash, size);
-}
-
 void
 st_free_table(table)
     st_table *table;
@@ -246,7 +223,6 @@ st_free_table(table)
        ptr = table->bins[i];
        while (ptr != 0) {
            next = ptr->next;
-            table->type->key_free(ptr->key);
            free(ptr);
            ptr = next;
        }
@@ -297,21 +273,6 @@ st_lookup(table, key, value)
     }
 }
 
-int
-st_lookup_strend(table, str_key, end_key, value)
-    st_table *table;
-    const unsigned char* str_key;
-    const unsigned char* end_key;
-    st_data_t *value;
-{
-  st_strend_key key;
-
-  key.s   = (unsigned char* )str_key;
-  key.end = (unsigned char* )end_key;
-
-  return st_lookup(table, (st_data_t )(&key), value);
-}
-
 #define ADD_DIRECT(table, key, value, hash_val, bin_pos)\
 do {\
     st_table_entry *entry;\
@@ -352,22 +313,6 @@ st_insert(table, key, value)
     }
 }
 
-int
-st_insert_strend(table, str_key, end_key, value)
-     st_table *table;
-     const unsigned char* str_key;
-     const unsigned char* end_key;
-     st_data_t value;
-{
-  st_strend_key* key;
-
-  key = alloc(st_strend_key);
-  key->s   = (unsigned char* )str_key;
-  key->end = (unsigned char* )end_key;
-
-  return st_insert(table, (st_data_t )key, value);
-}
-
 void
 st_add_direct(table, key, value)
     st_table *table;
@@ -381,21 +326,6 @@ st_add_direct(table, key, value)
     ADD_DIRECT(table, key, value, hash_val, bin_pos);
 }
 
-void
-st_add_direct_strend(table, str_key, end_key, value)
-    st_table *table;
-    const unsigned char* str_key;
-    const unsigned char* end_key;
-    st_data_t value;
-{
-  st_strend_key* key;
-
-  key = alloc(st_strend_key);
-  key->s   = (unsigned char* )str_key;
-  key->end = (unsigned char* )end_key;
-  st_add_direct(table, (st_data_t )key, value);
-}
-
 static void
 rehash(table)
     register st_table *table;
@@ -455,7 +385,6 @@ st_copy(old_table)
                return 0;
            }
            *entry = *ptr;
-            entry->key  = old_table->type->key_clone(ptr->key);
            entry->next = new_table->bins[i];
            new_table->bins[i] = entry;
            ptr = ptr->next;
@@ -556,7 +485,7 @@ st_cleanup_safe(table, never)
     table->num_entries = num_entries;
 }
 
-void
+int
 st_foreach(table, func, arg)
     st_table *table;
     int (*func)();
@@ -569,7 +498,7 @@ st_foreach(table, func, arg)
     for(i = 0; i < table->num_bins; i++) {
        last = 0;
        for(ptr = table->bins[i]; ptr != 0;) {
-           retval = (*func)(ptr->key, ptr->record, arg, 0);
+           retval = (*func)(ptr->key, ptr->record, arg);
            switch (retval) {
            case ST_CHECK:      /* check if hash is modified during iteration */
                tmp = 0;
@@ -580,8 +509,7 @@ st_foreach(table, func, arg)
                }
                if (!tmp) {
                    /* call func with error notice */
-                   retval = (*func)(0, 0, arg, 1);
-                   return;
+                   return 1;
                }
                /* fall through */
            case ST_CONTINUE:
@@ -589,7 +517,7 @@ st_foreach(table, func, arg)
                ptr = ptr->next;
                break;
            case ST_STOP:
-               return;
+               return 0;
            case ST_DELETE:
                tmp = ptr;
                if (last == 0) {
@@ -599,12 +527,12 @@ st_foreach(table, func, arg)
                    last->next = ptr->next;
                }
                ptr = ptr->next;
-                table->type->key_free(tmp->key);
                free(tmp);
                table->num_entries--;
            }
        }
     }
+    return 0;
 }
 
 static int
@@ -659,59 +587,3 @@ numhash(n)
 {
     return n;
 }
-
-extern int
-st_nothing_key_free(st_data_t key) { return 0; }
-
-extern st_data_t
-st_nothing_key_clone(st_data_t x) { return x; } 
-
-static int strend_cmp(st_strend_key* x, st_strend_key* y)
-{
-  unsigned char *p, *q;
-  int c;
-
-  if ((x->end - x->s) != (y->end - y->s))
-    return 1;
-
-  p = x->s;
-  q = y->s;
-  while (p < x->end) {
-    c = (int )*p - (int )*q;
-    if (c != 0) return c;
-
-    p++; q++;
-  }
-
-  return 0;
-}
-
-static int strend_hash(st_strend_key* x)
-{
-  int val;
-  unsigned char *p;
-
-  val = 0;
-  p = x->s;
-  while (p < x->end) {
-    val = val * 997 + (int )*p++;
-  }
-
-  return val + (val >> 5);
-}
-
-static int strend_key_free(st_data_t x)
-{
-  xfree((void* )x);
-  return 0;
-}
-
-static st_data_t strend_key_clone(st_data_t x)
-{
-  st_strend_key* new_key;
-  st_strend_key* key = (st_strend_key* )x;
-
-  new_key = alloc(st_strend_key);
-  *new_key = *key;
-  return (st_data_t )new_key;
-}
index c5cc4e625e08df72adf3ad84b9bfd846259849cb..da65e7fef83049c067e47ef10fad118f23693c03 100644 (file)
@@ -14,8 +14,6 @@ typedef struct st_table st_table;
 struct st_hash_type {
     int (*compare)();
     int (*hash)();
-    int (*key_free)();
-    st_data_t (*key_clone)();
 };
 
 struct st_table {
@@ -25,11 +23,6 @@ struct st_table {
     struct st_table_entry **bins;
 };
 
-typedef struct {
-  unsigned char* s;
-  unsigned char* end;
-} st_strend_key;
-
 #define st_is_member(table,key) st_lookup(table,key,(st_data_t *)0)
 
 enum st_retval {ST_CONTINUE, ST_STOP, ST_DELETE, ST_CHECK};
@@ -51,23 +44,16 @@ st_table *st_init_numtable _((void));
 st_table *st_init_numtable_with_size _((int));
 st_table *st_init_strtable _((void));
 st_table *st_init_strtable_with_size _((int));
-st_table *st_init_strend_table_with_size _((int));
 int st_delete _((st_table *, st_data_t *, st_data_t *));
 int st_delete_safe _((st_table *, st_data_t *, st_data_t *, st_data_t));
 int st_insert _((st_table *, st_data_t, st_data_t));
-int st_insert_strend _((st_table *, const unsigned char*, const unsigned char*, st_data_t));
 int st_lookup _((st_table *, st_data_t, st_data_t *));
-int st_lookup_strend _((st_table *, const unsigned char*, const unsigned char*, st_data_t*));
-void st_foreach _((st_table *, int (*)(ANYARGS), st_data_t));
+int st_foreach _((st_table *, int (*)(ANYARGS), st_data_t));
 void st_add_direct _((st_table *, st_data_t, st_data_t));
-void st_add_direct_strend _((st_table *, const unsigned char*, const unsigned char*, st_data_t));
 void st_free_table _((st_table *));
 void st_cleanup_safe _((st_table *, st_data_t));
 st_table *st_copy _((st_table *));
 
-extern st_data_t st_nothing_key_clone _((st_data_t key));
-extern int st_nothing_key_free _((st_data_t key));
-
 #define ST_NUMCMP      ((int (*)()) 0)
 #define ST_NUMHASH     ((int (*)()) -2)
 
index d868ab384356b2114591027986f3bd1a6173e771..67efd96acbafcf7d6518bd6983e6f3e302790e51 100644 (file)
@@ -718,6 +718,13 @@ static void _php_mb_regex_ereg_exec(INTERNAL_FUNCTION_PARAMETERS, int icase)
                convert_to_string_ex(arg_pattern);
                /* don't bother doing an extended regex with just a number */
        }
+
+       if (!Z_STRVAL_PP(arg_pattern) || Z_STRLEN_PP(arg_pattern) == 0) {
+               php_error_docref(NULL TSRMLS_CC, E_WARNING, "empty pattern");
+               RETVAL_FALSE;
+               goto out;
+       }
+
        re = php_mbregex_compile_pattern(Z_STRVAL_PP(arg_pattern), Z_STRLEN_PP(arg_pattern), options, MBREX(current_mbctype), MBREX(regex_default_syntax) TSRMLS_CC);
        if (re == NULL) {
                RETVAL_FALSE;
index 406069576e1c30d30f967737781f640585b5ebdc..2abad1330bfb80152e50be1c0fc70f8d6233fbd9 100644 (file)
@@ -98,7 +98,6 @@ static int prop_lookup(unsigned long code, unsigned long n)
 
 }
 
-
 MBSTRING_API int php_unicode_is_prop(unsigned long code, unsigned long mask1,
                unsigned long mask2)
 {
@@ -277,6 +276,11 @@ MBSTRING_API char *php_unicode_convert_case(int case_mode, const char *srcstr, s
        size_t i;
        enum mbfl_no_encoding _src_encoding = mbfl_name2no_encoding(src_encoding);
 
+       if (_src_encoding == mbfl_no_encoding_invalid) {
+               php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", src_encoding);
+               return NULL;
+       }       
+
        unicode = php_mb_convert_encoding(srcstr, srclen, "UCS-4BE", src_encoding, &unicode_len TSRMLS_CC);
        if (unicode == NULL)
                return NULL;
index 9484a47849c9ad91aaa465522d564b970c5986e5..39a39f64250c5cc9ed477c8d1c811c801ffe877b 100644 (file)
@@ -39,83 +39,91 @@ foreach($inputs as $input) {
 ?>
 
 --EXPECTF---- 
+
 -- Iteration 1 --
 Without $regs arg:
-int(1)
+
+Warning: mb_ereg(): empty pattern in %s on line %d
+bool(false)
 With $regs arg:
-int(1)
-array(1) {
-  [0]=>
-  bool(false)
-}
+
+Warning: mb_ereg(): empty pattern in %s on line %d
+bool(false)
+NULL
 
 -- Iteration 2 --
 Without $regs arg:
-int(1)
+
+Warning: mb_ereg(): empty pattern in %s on line %d
+bool(false)
 With $regs arg:
-int(1)
-array(1) {
-  [0]=>
-  bool(false)
-}
+
+Warning: mb_ereg(): empty pattern in %s on line %d
+bool(false)
+NULL
 
 -- Iteration 3 --
 Without $regs arg:
-int(1)
+
+Warning: mb_ereg(): empty pattern in %s on line %d
+bool(false)
 With $regs arg:
-int(1)
-array(1) {
-  [0]=>
-  bool(false)
-}
+
+Warning: mb_ereg(): empty pattern in %s on line %d
+bool(false)
+NULL
 
 -- Iteration 4 --
 Without $regs arg:
-int(1)
+
+Warning: mb_ereg(): empty pattern in %s on line %d
+bool(false)
 With $regs arg:
-int(1)
-array(1) {
-  [0]=>
-  bool(false)
-}
+
+Warning: mb_ereg(): empty pattern in %s on line %d
+bool(false)
+NULL
 
 -- Iteration 5 --
 Without $regs arg:
-int(1)
+
+Warning: mb_ereg(): empty pattern in %s on line %d
+bool(false)
 With $regs arg:
-int(1)
-array(1) {
-  [0]=>
-  bool(false)
-}
+
+Warning: mb_ereg(): empty pattern in %s on line %d
+bool(false)
+NULL
 
 -- Iteration 6 --
 Without $regs arg:
-int(1)
+
+Warning: mb_ereg(): empty pattern in %s on line %d
+bool(false)
 With $regs arg:
-int(1)
-array(1) {
-  [0]=>
-  bool(false)
-}
+
+Warning: mb_ereg(): empty pattern in %s on line %d
+bool(false)
+NULL
 
 -- Iteration 7 --
 Without $regs arg:
-int(1)
+
+Warning: mb_ereg(): empty pattern in %s on line %d
+bool(false)
 With $regs arg:
-int(1)
-array(1) {
-  [0]=>
-  bool(false)
-}
+
+Warning: mb_ereg(): empty pattern in %s on line %d
+bool(false)
+NULL
 
 -- Iteration 8 --
 Without $regs arg:
-int(1)
+
+Warning: mb_ereg(): empty pattern in %s on line %d
+bool(false)
 With $regs arg:
-int(1)
-array(1) {
-  [0]=>
-  bool(false)
-}
 
+Warning: mb_ereg(): empty pattern in %s on line %d
+bool(false)
+NULL
index 87b354b49e9336cc57fa1449b157c0278581934c..da5e599da00378df2bfe4997e9c5df4926d9c959 100644 (file)
@@ -29,55 +29,43 @@ foreach($inputs as $input) {
 };
 ?>
 --EXPECTF--
--- Iteration 1 --
 
-Warning: mb_strtolower(): Illegal character encoding specified in %s on line %d
+-- Iteration 1 --
 
 Warning: mb_strtolower(): Unknown encoding "12345" in %s on line %d
 bool(false)
 
-Warning: mb_strtoupper(): Illegal character encoding specified in %s on line %d
-
 Warning: mb_strtoupper(): Unknown encoding "12345" in %s on line %d
 bool(false)
 
 -- Iteration 2 --
 
-Warning: mb_strtolower(): Illegal character encoding specified in %s on line %d
-
 Warning: mb_strtolower(): Unknown encoding "1.23456789E-9" in %s on line %d
 bool(false)
 
-Warning: mb_strtoupper(): Illegal character encoding specified in %s on line %d
-
 Warning: mb_strtoupper(): Unknown encoding "1.23456789E-9" in %s on line %d
 bool(false)
 
 -- Iteration 3 --
 
-Warning: mb_strtolower(): Illegal character encoding specified in %s on line %d
-
 Warning: mb_strtolower(): Unknown encoding "1" in %s on line %d
 bool(false)
 
-Warning: mb_strtoupper(): Illegal character encoding specified in %s on line %d
-
 Warning: mb_strtoupper(): Unknown encoding "1" in %s on line %d
 bool(false)
 
 -- Iteration 4 --
 
-Warning: mb_strtolower(): Illegal character encoding specified in %s on line %d
-string(12) "hello, world"
+Warning: mb_strtolower(): Unknown encoding "" in %s on line %d
+bool(false)
 
-Warning: mb_strtoupper(): Illegal character encoding specified in %s on line %d
-string(12) "HELLO, WORLD"
+Warning: mb_strtoupper(): Unknown encoding "" in %s on line %d
+bool(false)
 
 -- Iteration 5 --
 
-Warning: mb_strtolower(): Illegal character encoding specified in %s on line %d
-string(12) "hello, world"
-
-Warning: mb_strtoupper(): Illegal character encoding specified in %s on line %d
-string(12) "HELLO, WORLD"
+Warning: mb_strtolower(): Unknown encoding "" in %s on line %d
+bool(false)
 
+Warning: mb_strtoupper(): Unknown encoding "" in %s on line %d
+bool(false)
diff --git a/ext/mbstring/tests/mb_strstr.phpt b/ext/mbstring/tests/mb_strstr.phpt
new file mode 100644 (file)
index 0000000..fcf9e85
--- /dev/null
@@ -0,0 +1,35 @@
+--TEST--
+mb_strstr() 
+--SKIPIF--
+<?php extension_loaded('mbstring') or die('skip mbstring not available'); ?>
+--INI--
+mbstring.internal_encoding=UTF-8
+--FILE--
+<?php
+function EUC_JP($utf8str) {
+       return mb_convert_encoding($utf8str, "EUC-JP", "UTF-8");
+}
+
+function FROM_EUC_JP($eucjpstr) {
+       return mb_convert_encoding($eucjpstr, "UTF-8", "EUC-JP");
+}
+
+var_dump(mb_strstr("あいうえおかきくけこ", "おかき"));
+var_dump(mb_strstr("あいうえおかきくけこ", "おかき", false));
+var_dump(mb_strstr("あいうえおかきくけこ", "おかき", true));
+var_dump(FROM_EUC_JP(mb_strstr(EUC_JP("あいうえおかきくけこ"), EUC_JP("おかき"), false, "EUC-JP")));
+var_dump(FROM_EUC_JP(mb_strstr(EUC_JP("あいうえおかきくけこ"), EUC_JP("おかき"), true, "EUC-JP")));
+mb_internal_encoding("EUC-JP");
+var_dump(FROM_EUC_JP(mb_strstr(EUC_JP("あいうえおかきくけこ"), EUC_JP("おかき"))));
+var_dump(FROM_EUC_JP(mb_strstr(EUC_JP("あいうえおかきくけこ"), EUC_JP("おかき"), false)));
+var_dump(FROM_EUC_JP(mb_strstr(EUC_JP("あいうえおかきくけこ"), EUC_JP("おかき"), true)));
+?>
+--EXPECT--
+string(18) "おかきくけこ"
+string(18) "おかきくけこ"
+string(12) "あいうえ"
+string(18) "おかきくけこ"
+string(12) "あいうえ"
+string(18) "おかきくけこ"
+string(18) "おかきくけこ"
+string(12) "あいうえ"