From a0f3cf5cc4a42850b940da33a0b912268286bfd3 Mon Sep 17 00:00:00 2001
From: Jani Taskinen
Date: Mon, 20 Apr 2009 17:06:03 +0000
Subject: [PATCH] MFB: Thanks to the "maintainers" who are too lazy to commit
FIRST to HEAD!
---
ext/mbstring/config.m4 | 13 +-
ext/mbstring/config.w32 | 14 +-
ext/mbstring/libmbfl/AUTHORS | 15 +-
ext/mbstring/libmbfl/Makefile.am | 6 +-
ext/mbstring/libmbfl/configure.in | 15 +-
ext/mbstring/libmbfl/filters/Makefile.am | 134 +-
ext/mbstring/libmbfl/filters/Makefile.bcc32 | 52 +-
.../libmbfl/filters/mbfilter_cp1254.c | 157 +++
.../libmbfl/filters/mbfilter_cp1254.h | 43 +
ext/mbstring/libmbfl/filters/mbfilter_cp850.c | 147 +++
ext/mbstring/libmbfl/filters/mbfilter_cp850.h | 37 +
.../libmbfl/filters/mbfilter_euc_jp_win.c | 11 +
.../libmbfl/filters/mbfilter_iso2022_jp_ms.c | 522 ++++++++
.../libmbfl/filters/mbfilter_iso2022_jp_ms.h | 44 +
ext/mbstring/libmbfl/filters/mbfilter_jis.c | 154 +--
ext/mbstring/libmbfl/filters/mbfilter_jis.h | 4 -
ext/mbstring/libmbfl/filters/mbfilter_koi8u.c | 146 +++
ext/mbstring/libmbfl/filters/mbfilter_koi8u.h | 47 +
.../libmbfl/filters/unicode_table_cp1254.h | 51 +
.../libmbfl/filters/unicode_table_cp850.h | 52 +
.../libmbfl/filters/unicode_table_jis.h | 14 +-
.../libmbfl/filters/unicode_table_koi8u.h | 166 +++
ext/mbstring/libmbfl/libmbfl.dsp | 12 +
ext/mbstring/libmbfl/libmbfl.sln | 26 +-
ext/mbstring/libmbfl/libmbfl.vcproj | 777 ++++++++----
ext/mbstring/libmbfl/mbfl.rc | 8 +-
ext/mbstring/libmbfl/mbfl/Makefile.am | 29 +-
ext/mbstring/libmbfl/mbfl/Makefile.bcc32 | 13 +-
ext/mbstring/libmbfl/mbfl/mbfilter.h | 7 +
ext/mbstring/libmbfl/mbfl/mbfilter_8bit.h | 2 +-
ext/mbstring/libmbfl/mbfl/mbfilter_pass.h | 4 +-
ext/mbstring/libmbfl/mbfl/mbfilter_wchar.h | 2 +-
ext/mbstring/libmbfl/mbfl/mbfl_convert.c | 7 +
ext/mbstring/libmbfl/mbfl/mbfl_encoding.c | 25 +-
ext/mbstring/libmbfl/mbfl/mbfl_encoding.h | 2 +
ext/mbstring/libmbfl/mbfl/mbfl_ident.c | 7 +
ext/mbstring/libmbfl/mbfl/mbfl_language.c | 2 +
ext/mbstring/libmbfl/mbfl/mbfl_language.h | 1 +
ext/mbstring/libmbfl/nls/Makefile.am | 23 +-
ext/mbstring/libmbfl/nls/Makefile.bcc32 | 12 +-
ext/mbstring/libmbfl/nls/nls_ua.c | 22 +
ext/mbstring/libmbfl/nls/nls_ua.h | 9 +
ext/mbstring/libmbfl/tests/Makefile.am | 10 +
ext/mbstring/libmbfl/tests/conv_encoding.c | 104 ++
.../tests/conv_encoding.tests/Makefile.am | 1 +
.../cp51932_cp50220raw.exp | 33 +
.../tests/conv_encoding.tests/ujis_sjis.exp | 35 +
.../tests/conv_encoding.tests/utf8_sjis.exp | 35 +
ext/mbstring/libmbfl/tests/conv_kana.c | 147 +++
ext/mbstring/libmbfl/tests/strcut.c | 113 ++
.../libmbfl/tests/strcut.tests/Makefile.am | 1 +
.../libmbfl/tests/strcut.tests/iso2022jp.exp | 129 ++
.../libmbfl/tests/strcut.tests/ujis.exp | 91 ++
.../libmbfl/tests/strcut.tests/utf8.exp | 91 ++
.../libmbfl/tests/strwidth.tests/Makefile.am | 1 +
.../tests/strwidth.tests/conv_encoding.c | 104 ++
.../conv_encoding.tests/Makefile.am | 1 +
.../cp51932_cp50220raw.exp | 33 +
.../conv_encoding.tests/ujis_sjis.exp | 35 +
.../conv_encoding.tests/utf8_sjis.exp | 35 +
.../libmbfl/tests/strwidth.tests/conv_kana.c | 147 +++
.../conv_kana.tests/Makefile.am | 1 +
.../conv_kana.tests/conv_kana.exp | 1098 +++++++++++++++++
.../libmbfl/tests/strwidth.tests/strwidth.exp | 47 +
ext/mbstring/mbstring.c | 199 ++-
ext/mbstring/mbstring.h | 10 -
ext/mbstring/oniguruma/COPYING | 4 +-
ext/mbstring/oniguruma/HISTORY | 487 +++++++-
ext/mbstring/oniguruma/README | 64 +-
ext/mbstring/oniguruma/README.ja | 67 +-
ext/mbstring/oniguruma/config.h.in | 125 +-
ext/mbstring/oniguruma/enc/big5.c | 2 +-
ext/mbstring/oniguruma/enc/euc_jp.c | 20 +-
ext/mbstring/oniguruma/enc/euc_kr.c | 2 +-
ext/mbstring/oniguruma/enc/euc_tw.c | 2 +-
ext/mbstring/oniguruma/enc/iso8859_1.c | 45 +-
ext/mbstring/oniguruma/enc/iso8859_10.c | 51 +-
ext/mbstring/oniguruma/enc/iso8859_11.c | 14 +-
ext/mbstring/oniguruma/enc/iso8859_13.c | 191 ++-
ext/mbstring/oniguruma/enc/iso8859_14.c | 245 ++--
ext/mbstring/oniguruma/enc/iso8859_15.c | 70 +-
ext/mbstring/oniguruma/enc/iso8859_16.c | 233 ++--
ext/mbstring/oniguruma/enc/iso8859_2.c | 51 +-
ext/mbstring/oniguruma/enc/iso8859_3.c | 51 +-
ext/mbstring/oniguruma/enc/iso8859_4.c | 51 +-
ext/mbstring/oniguruma/enc/iso8859_5.c | 20 +-
ext/mbstring/oniguruma/enc/iso8859_6.c | 14 +-
ext/mbstring/oniguruma/enc/iso8859_7.c | 20 +-
ext/mbstring/oniguruma/enc/iso8859_8.c | 14 +-
ext/mbstring/oniguruma/enc/iso8859_9.c | 51 +-
ext/mbstring/oniguruma/enc/koi8.c | 28 +-
ext/mbstring/oniguruma/enc/koi8_r.c | 25 +-
ext/mbstring/oniguruma/enc/mktable.c | 16 +-
ext/mbstring/oniguruma/enc/sjis.c | 21 +-
ext/mbstring/oniguruma/enc/unicode.c | 37 +-
ext/mbstring/oniguruma/enc/utf16_be.c | 39 +-
ext/mbstring/oniguruma/enc/utf16_le.c | 36 +-
ext/mbstring/oniguruma/enc/utf32_be.c | 41 +-
ext/mbstring/oniguruma/enc/utf32_le.c | 41 +-
ext/mbstring/oniguruma/enc/utf8.c | 126 +-
ext/mbstring/oniguruma/index.html | 181 +--
ext/mbstring/oniguruma/onigcmpt200.h | 6 +
ext/mbstring/oniguruma/oniggnu.h | 8 +-
ext/mbstring/oniguruma/oniguruma.h | 230 ++--
ext/mbstring/oniguruma/regcomp.c | 1082 +++++++++++-----
ext/mbstring/oniguruma/regenc.c | 72 +-
ext/mbstring/oniguruma/regenc.h | 26 +-
ext/mbstring/oniguruma/regerror.c | 95 +-
ext/mbstring/oniguruma/regexec.c | 727 ++++++++---
ext/mbstring/oniguruma/regext.c | 4 +-
ext/mbstring/oniguruma/reggnu.c | 12 +-
ext/mbstring/oniguruma/regint.h | 129 +-
ext/mbstring/oniguruma/regparse.c | 690 +++++++----
ext/mbstring/oniguruma/regparse.h | 48 +-
ext/mbstring/oniguruma/regposix.c | 11 +-
ext/mbstring/oniguruma/regsyntax.c | 33 +-
ext/mbstring/oniguruma/regversion.c | 4 +-
ext/mbstring/oniguruma/st.c | 140 +--
ext/mbstring/oniguruma/st.h | 16 +-
ext/mbstring/php_mbregex.c | 7 +
ext/mbstring/php_unicode.c | 6 +-
ext/mbstring/tests/bug43994.phpt | 104 +-
ext/mbstring/tests/bug43998.phpt | 30 +-
ext/mbstring/tests/mb_strstr.phpt | 35 +
124 files changed, 8427 insertions(+), 2908 deletions(-)
create mode 100644 ext/mbstring/libmbfl/filters/mbfilter_cp1254.c
create mode 100644 ext/mbstring/libmbfl/filters/mbfilter_cp1254.h
create mode 100644 ext/mbstring/libmbfl/filters/mbfilter_cp850.c
create mode 100644 ext/mbstring/libmbfl/filters/mbfilter_cp850.h
create mode 100644 ext/mbstring/libmbfl/filters/mbfilter_iso2022_jp_ms.c
create mode 100644 ext/mbstring/libmbfl/filters/mbfilter_iso2022_jp_ms.h
create mode 100644 ext/mbstring/libmbfl/filters/mbfilter_koi8u.c
create mode 100644 ext/mbstring/libmbfl/filters/mbfilter_koi8u.h
create mode 100644 ext/mbstring/libmbfl/filters/unicode_table_cp1254.h
create mode 100644 ext/mbstring/libmbfl/filters/unicode_table_cp850.h
create mode 100644 ext/mbstring/libmbfl/filters/unicode_table_koi8u.h
create mode 100644 ext/mbstring/libmbfl/nls/nls_ua.c
create mode 100644 ext/mbstring/libmbfl/nls/nls_ua.h
create mode 100644 ext/mbstring/libmbfl/tests/Makefile.am
create mode 100644 ext/mbstring/libmbfl/tests/conv_encoding.c
create mode 100644 ext/mbstring/libmbfl/tests/conv_encoding.tests/Makefile.am
create mode 100644 ext/mbstring/libmbfl/tests/conv_encoding.tests/cp51932_cp50220raw.exp
create mode 100644 ext/mbstring/libmbfl/tests/conv_encoding.tests/ujis_sjis.exp
create mode 100644 ext/mbstring/libmbfl/tests/conv_encoding.tests/utf8_sjis.exp
create mode 100644 ext/mbstring/libmbfl/tests/conv_kana.c
create mode 100644 ext/mbstring/libmbfl/tests/strcut.c
create mode 100644 ext/mbstring/libmbfl/tests/strcut.tests/Makefile.am
create mode 100644 ext/mbstring/libmbfl/tests/strcut.tests/iso2022jp.exp
create mode 100644 ext/mbstring/libmbfl/tests/strcut.tests/ujis.exp
create mode 100644 ext/mbstring/libmbfl/tests/strcut.tests/utf8.exp
create mode 100644 ext/mbstring/libmbfl/tests/strwidth.tests/Makefile.am
create mode 100644 ext/mbstring/libmbfl/tests/strwidth.tests/conv_encoding.c
create mode 100644 ext/mbstring/libmbfl/tests/strwidth.tests/conv_encoding.tests/Makefile.am
create mode 100644 ext/mbstring/libmbfl/tests/strwidth.tests/conv_encoding.tests/cp51932_cp50220raw.exp
create mode 100644 ext/mbstring/libmbfl/tests/strwidth.tests/conv_encoding.tests/ujis_sjis.exp
create mode 100644 ext/mbstring/libmbfl/tests/strwidth.tests/conv_encoding.tests/utf8_sjis.exp
create mode 100644 ext/mbstring/libmbfl/tests/strwidth.tests/conv_kana.c
create mode 100644 ext/mbstring/libmbfl/tests/strwidth.tests/conv_kana.tests/Makefile.am
create mode 100644 ext/mbstring/libmbfl/tests/strwidth.tests/conv_kana.tests/conv_kana.exp
create mode 100644 ext/mbstring/libmbfl/tests/strwidth.tests/strwidth.exp
create mode 100644 ext/mbstring/tests/mb_strstr.phpt
diff --git a/ext/mbstring/config.m4 b/ext/mbstring/config.m4
index e61ba3c1b2..a5e7920cdf 100644
--- a/ext/mbstring/config.m4
+++ b/ext/mbstring/config.m4
@@ -182,9 +182,9 @@ int main() { return foo(10, "", 3.14); }
PHP_EVAL_LIBLINE([$MBSTRING_SHARED_LIBADD], LDFLAGS)
AC_MSG_CHECKING([if oniguruma has an invalid entry for KOI8 encoding])
AC_TRY_LINK([
- #include
+#include
], [
- return (int)(ONIG_ENCODING_KOI8 + 1);
+return (int)(ONIG_ENCODING_KOI8 + 1);
], [
AC_MSG_RESULT([no])
], [
@@ -228,6 +228,7 @@ AC_DEFUN([PHP_MBSTRING_SETUP_LIBMBFL], [
libmbfl/filters/mbfilter_byte4.c
libmbfl/filters/mbfilter_cp1251.c
libmbfl/filters/mbfilter_cp1252.c
+ libmbfl/filters/mbfilter_cp1254.c
libmbfl/filters/mbfilter_cp866.c
libmbfl/filters/mbfilter_cp932.c
libmbfl/filters/mbfilter_cp936.c
@@ -255,6 +256,7 @@ AC_DEFUN([PHP_MBSTRING_SETUP_LIBMBFL], [
libmbfl/filters/mbfilter_iso8859_8.c
libmbfl/filters/mbfilter_iso8859_9.c
libmbfl/filters/mbfilter_jis.c
+ libmbfl/filters/mbfilter_iso2022_jp_ms.c
libmbfl/filters/mbfilter_koi8r.c
libmbfl/filters/mbfilter_armscii8.c
libmbfl/filters/mbfilter_qprint.c
@@ -268,6 +270,7 @@ AC_DEFUN([PHP_MBSTRING_SETUP_LIBMBFL], [
libmbfl/filters/mbfilter_utf7imap.c
libmbfl/filters/mbfilter_utf8.c
libmbfl/filters/mbfilter_uuencode.c
+ libmbfl/filters/mbfilter_koi8u.c
libmbfl/filters/mbfilter_cp850.c
libmbfl/mbfl/mbfilter.c
libmbfl/mbfl/mbfilter_8bit.c
@@ -291,9 +294,9 @@ AC_DEFUN([PHP_MBSTRING_SETUP_LIBMBFL], [
libmbfl/nls/nls_zh.c
libmbfl/nls/nls_hy.c
libmbfl/nls/nls_tr.c
+ libmbfl/nls/nls_ua.c
])
PHP_MBSTRING_ADD_CFLAG([-DHAVE_CONFIG_H])
-
PHP_MBSTRING_ADD_INSTALL_HEADERS([libmbfl/config.h libmbfl/mbfl/eaw_table.h libmbfl/mbfl/mbfilter.h libmbfl/mbfl/mbfilter_8bit.h libmbfl/mbfl/mbfilter_pass.h libmbfl/mbfl/mbfilter_wchar.h libmbfl/mbfl/mbfl_allocators.h libmbfl/mbfl/mbfl_consts.h libmbfl/mbfl/mbfl_convert.h libmbfl/mbfl/mbfl_defs.h libmbfl/mbfl/mbfl_encoding.h libmbfl/mbfl/mbfl_filter_output.h libmbfl/mbfl/mbfl_ident.h libmbfl/mbfl/mbfl_language.h libmbfl/mbfl/mbfl_memory_device.h libmbfl/mbfl/mbfl_string.h])
else
dnl
@@ -337,8 +340,8 @@ PHP_ARG_ENABLE([mbregex_backtrack], [whether to check multibyte regex backtrack]
MBSTRING: Disable multibyte regex backtrack check], yes, no)
PHP_ARG_WITH(libmbfl, [for external libmbfl],
-[ --with-libmbfl[=DIR] MBSTRING: Use external libmbfl. DIR is the libmbfl install prefix.
- If DIR is not set, the bundled libmbfl will be used], no, no)
+[ --with-libmbfl[=DIR] MBSTRING: Use external libmbfl. DIR is the libmbfl base
+ install directory [BUNDLED]], no, no)
PHP_ARG_WITH(onig, [for external oniguruma],
[ --with-onig[=DIR] MBSTRING: Use external oniguruma. DIR is the oniguruma install prefix.
diff --git a/ext/mbstring/config.w32 b/ext/mbstring/config.w32
index 87f68dbd3b..f452c1868a 100644
--- a/ext/mbstring/config.w32
+++ b/ext/mbstring/config.w32
@@ -3,6 +3,7 @@
ARG_ENABLE("mbstring", "multibyte string functions", "no");
ARG_ENABLE("mbregex", "multibyte regex support", "no");
+ARG_ENABLE("mbregex-backtrack", "check multibyte regex backtrack", "yes");
if (PHP_MBSTRING == "yes") {
@@ -11,7 +12,7 @@ if (PHP_MBSTRING == "yes") {
FSO.CopyFile("ext\\mbstring\\oniguruma\\win32\\config.h",
"ext\\mbstring\\oniguruma\\config.h", true);
- EXTENSION("mbstring", "mbstring.c php_unicode.c mb_gpc.c", null,
+ EXTENSION("mbstring", "mbstring.c php_unicode.c mb_gpc.c", true,
"-Iext/mbstring/libmbfl -Iext/mbstring/libmbfl/mbfl \
-Iext/mbstring/oniguruma /D NOT_RUBY=1 /D LIBMBFL_EXPORTS=1 \
/D HAVE_STDARG_PROTOTYPES=1 /D HAVE_CONFIG_H /D HAVE_STDLIB_H \
@@ -28,10 +29,12 @@ if (PHP_MBSTRING == "yes") {
mbfilter_iso8859_2.c mbfilter_iso8859_3.c mbfilter_iso8859_4.c \
mbfilter_iso8859_5.c mbfilter_iso8859_6.c mbfilter_iso8859_7.c \
mbfilter_iso8859_8.c mbfilter_iso8859_9.c mbfilter_jis.c \
+ mbfilter_iso2022_jp_ms.c \
mbfilter_koi8r.c mbfilter_qprint.c mbfilter_sjis.c mbfilter_ucs2.c \
mbfilter_ucs4.c mbfilter_uhc.c mbfilter_utf16.c mbfilter_utf32.c \
mbfilter_utf7.c mbfilter_utf7imap.c mbfilter_utf8.c \
- mbfilter_uuencode.c mbfilter_armscii8.c", "mbstring");
+ mbfilter_koi8u.c mbfilter_cp1254.c \
+ mbfilter_uuencode.c mbfilter_armscii8.c mbfilter_cp850.c ", "mbstring");
ADD_SOURCES("ext/mbstring/libmbfl/mbfl", "mbfilter.c mbfilter_8bit.c \
mbfilter_pass.c mbfilter_wchar.c mbfl_convert.c mbfl_encoding.c \
@@ -40,7 +43,7 @@ if (PHP_MBSTRING == "yes") {
ADD_SOURCES("ext/mbstring/libmbfl/nls", "nls_de.c nls_en.c nls_ja.c \
nls_kr.c nls_neutral.c nls_ru.c nls_uni.c nls_zh.c nls_hy.c \
- nls_tr.c", "mbstring");
+ nls_ua.c nls_tr.c", "mbstring");
AC_DEFINE('HAVE_MBSTRING', 1, 'Have mbstring support');
AC_DEFINE('HAVE_MBSTR_CN', 1, 'CN');
@@ -53,6 +56,9 @@ if (PHP_MBSTRING == "yes") {
AC_DEFINE('HAVE_STDARG_PROTOTYPES', 1, 'have stdarg.h');
AC_DEFINE('HAVE_MBREGEX', 1);
AC_DEFINE('HAVE_ONIG', 1);
+ if (PHP_MBREGEX_BACKTRACK != "no") {
+ AC_DEFINE('USE_COMBINATION_EXPLOSION_CHECK', 1);
+ }
ADD_SOURCES("ext/mbstring/oniguruma", "regcomp.c regerror.c \
regenc.c regexec.c reggnu.c regparse.c regposerr.c \
regext.c regsyntax.c regtrav.c regversion.c st.c", "mbstring");
@@ -62,7 +68,7 @@ if (PHP_MBSTRING == "yes") {
iso8859_7.c iso8859_8.c iso8859_9.c iso8859_10.c \
iso8859_11.c iso8859_13.c iso8859_14.c iso8859_15.c iso8859_16.c \
koi8.c koi8_r.c sjis.c utf8.c unicode.c utf16_be.c utf16_le.c \
- utf32_be.c utf32_le.c", "mbstring");
+ utf32_be.c utf32_le.c gb18030.c", "mbstring");
ADD_SOURCES("ext/mbstring", "php_mbregex.c", "mbstring");
}
}
diff --git a/ext/mbstring/libmbfl/AUTHORS b/ext/mbstring/libmbfl/AUTHORS
index e606231518..9a9f2f9fa3 100644
--- a/ext/mbstring/libmbfl/AUTHORS
+++ b/ext/mbstring/libmbfl/AUTHORS
@@ -1,10 +1,13 @@
-Den V. Tsopa
-Hironori Sato
Marcus Boerger
-Moriyoshi Koizumi
+Hayk Chamyan
+Wez Furlong
Rui Hirokawa
Shigeru Kanemoto
-Tsukada Takuya
-Tateyama
U. Kenkichi
-Wez Furlong
+Moriyoshi Koizumi
+Hironori Sato
+Tsukada Takuya
+Tateyama
+Den V. Tsopa
+Maksym Veremeyenko
+Haluk AKIN
diff --git a/ext/mbstring/libmbfl/Makefile.am b/ext/mbstring/libmbfl/Makefile.am
index 7f60683435..070a7fcc62 100644
--- a/ext/mbstring/libmbfl/Makefile.am
+++ b/ext/mbstring/libmbfl/Makefile.am
@@ -1,5 +1,9 @@
+AUTOMAKE_OPTIONS=dejagnu
+DEJATOOL=conv_encoding conv_kana strwidth strcut
+RUNTESTDEFAULTFLAGS=--tool $$tool --srcdir "$$srcdir"/tests
+LANG=C
EXTRA_DIST=AUTHORS DISCLAIMER LICENSE Makefile.bcc32 \
config.h.bcc32 config.h.vc6 \
libmbfl.dsp libmbfl.dsw libmbfl.sln libmbfl.vcproj mbfl.rc \
mksbcc32.bat rules.mak.bcc32
-SUBDIRS = nls filters mbfl
+SUBDIRS = nls filters mbfl tests
diff --git a/ext/mbstring/libmbfl/configure.in b/ext/mbstring/libmbfl/configure.in
index 25d2e59373..47e1026c7d 100644
--- a/ext/mbstring/libmbfl/configure.in
+++ b/ext/mbstring/libmbfl/configure.in
@@ -1,10 +1,10 @@
# Process this file with autoconf to produce a configure script.
AC_INIT(mbfl/mbfilter.c)
-AM_INIT_AUTOMAKE(libmbfl, 1.0.0)
+AM_INIT_AUTOMAKE(libmbfl, 1.0.2)
AC_CONFIG_SRCDIR(mbfl/mbfilter.c)
AM_CONFIG_HEADER(config.h)
-SHLIB_VERSION="1:0:0"
+SHLIB_VERSION="1:0:2"
AC_SUBST(SHLIB_VERSION)
# Checks for programs.
@@ -34,5 +34,14 @@ if test "$FETCH_VIA_FTP" = "curl"; then
FETCH_VIA_FTP="curl -O"
fi
-AC_CONFIG_FILES([Makefile mbfl/Makefile filters/Makefile nls/Makefile])
+AC_CONFIG_FILES([
+ Makefile
+ mbfl/Makefile
+ filters/Makefile
+ nls/Makefile
+ tests/Makefile
+ tests/conv_encoding.tests/Makefile
+ tests/conv_kana.tests/Makefile
+ tests/strwidth.tests/Makefile
+ tests/strcut.tests/Makefile])
AC_OUTPUT
diff --git a/ext/mbstring/libmbfl/filters/Makefile.am b/ext/mbstring/libmbfl/filters/Makefile.am
index 9b2fda4c39..802af4e61d 100644
--- a/ext/mbstring/libmbfl/filters/Makefile.am
+++ b/ext/mbstring/libmbfl/filters/Makefile.am
@@ -2,7 +2,139 @@ EXTRA_DIST=Makefile.bcc32 mk_sb_tbl.awk
noinst_LTLIBRARIES=libmbfl_filters.la
INCLUDES=-I../mbfl
libmbfl_filters_la_LDFLAGS=-version-info $(SHLIB_VERSION)
-libmbfl_filters_la_SOURCES=mbfilter_cp936.c mbfilter_hz.c mbfilter_euc_tw.c mbfilter_big5.c mbfilter_euc_jp.c mbfilter_jis.c mbfilter_iso8859_1.c mbfilter_iso8859_2.c mbfilter_cp1252.c mbfilter_cp1251.c mbfilter_ascii.c mbfilter_iso8859_3.c mbfilter_iso8859_4.c mbfilter_iso8859_5.c mbfilter_iso8859_6.c mbfilter_iso8859_7.c mbfilter_iso8859_8.c mbfilter_iso8859_9.c mbfilter_iso8859_10.c mbfilter_iso8859_13.c mbfilter_iso8859_14.c mbfilter_iso8859_15.c mbfilter_iso8859_16.c mbfilter_htmlent.c mbfilter_byte2.c mbfilter_byte4.c mbfilter_uuencode.c mbfilter_base64.c mbfilter_sjis.c mbfilter_7bit.c mbfilter_qprint.c mbfilter_ucs4.c mbfilter_ucs2.c mbfilter_utf32.c mbfilter_utf16.c mbfilter_utf8.c mbfilter_utf7.c mbfilter_utf7imap.c mbfilter_euc_jp_win.c mbfilter_cp932.c mbfilter_cp51932.c mbfilter_euc_cn.c mbfilter_euc_kr.c mbfilter_uhc.c mbfilter_iso2022_kr.c mbfilter_cp866.c mbfilter_koi8r.c mbfilter_armscii8.c html_entities.c cp932_table.h html_entities.h mbfilter_7bit.h mbfilter_ascii.h mbfilter_base64.h mbfilter_big5.h mbfilter_byte2.h mbfilter_byte4.h mbfilter_cp1251.h mbfilter_cp1252.h mbfilter_cp866.h mbfilter_cp932.h mbfilter_cp51932.h mbfilter_cp936.h mbfilter_euc_cn.h mbfilter_euc_jp.h mbfilter_euc_jp_win.h mbfilter_euc_kr.h mbfilter_euc_tw.h mbfilter_htmlent.h mbfilter_hz.h mbfilter_iso2022_kr.h mbfilter_iso8859_1.h mbfilter_iso8859_10.h mbfilter_iso8859_13.h mbfilter_iso8859_14.h mbfilter_iso8859_15.h mbfilter_iso8859_16.h mbfilter_iso8859_2.h mbfilter_iso8859_3.h mbfilter_iso8859_4.h mbfilter_iso8859_5.h mbfilter_iso8859_6.h mbfilter_iso8859_7.h mbfilter_iso8859_8.h mbfilter_iso8859_9.h mbfilter_jis.h mbfilter_koi8r.h mbfilter_armscii8.h mbfilter_qprint.h mbfilter_sjis.h mbfilter_ucs2.h mbfilter_ucs4.h mbfilter_uhc.h mbfilter_utf16.h mbfilter_utf32.h mbfilter_utf7.h mbfilter_utf7imap.h mbfilter_utf8.h mbfilter_uuencode.h unicode_prop.h unicode_table_big5.h unicode_table_cns11643.h unicode_table_cp1251.h unicode_table_cp1252.h unicode_table_cp866.h unicode_table_cp932_ext.h unicode_table_cp936.h unicode_table_iso8859_10.h unicode_table_iso8859_13.h unicode_table_iso8859_14.h unicode_table_iso8859_15.h unicode_table_iso8859_16.h unicode_table_iso8859_2.h unicode_table_iso8859_3.h unicode_table_iso8859_4.h unicode_table_iso8859_5.h unicode_table_iso8859_6.h unicode_table_iso8859_7.h unicode_table_iso8859_8.h unicode_table_iso8859_9.h unicode_table_jis.h unicode_table_koi8r.h unicode_table_armscii8.h unicode_table_uhc.h
+libmbfl_filters_la_SOURCES=mbfilter_cp936.c \
+ mbfilter_hz.c \
+ mbfilter_euc_tw.c \
+ mbfilter_big5.c \
+ mbfilter_euc_jp.c \
+ mbfilter_jis.c \
+ mbfilter_iso8859_1.c \
+ mbfilter_iso8859_2.c \
+ mbfilter_cp1254.c \
+ mbfilter_cp1252.c \
+ mbfilter_cp1251.c \
+ mbfilter_ascii.c \
+ mbfilter_iso8859_3.c \
+ mbfilter_iso8859_4.c \
+ mbfilter_iso8859_5.c \
+ mbfilter_iso8859_6.c \
+ mbfilter_iso8859_7.c \
+ mbfilter_iso8859_8.c \
+ mbfilter_iso8859_9.c \
+ mbfilter_iso8859_10.c \
+ mbfilter_iso8859_13.c \
+ mbfilter_iso8859_14.c \
+ mbfilter_iso8859_15.c \
+ mbfilter_iso8859_16.c \
+ mbfilter_htmlent.c \
+ mbfilter_byte2.c \
+ mbfilter_byte4.c \
+ mbfilter_uuencode.c \
+ mbfilter_base64.c \
+ mbfilter_sjis.c \
+ mbfilter_7bit.c \
+ mbfilter_qprint.c \
+ mbfilter_ucs4.c \
+ mbfilter_ucs2.c \
+ mbfilter_utf32.c \
+ mbfilter_utf16.c \
+ mbfilter_utf8.c \
+ mbfilter_utf7.c \
+ mbfilter_utf7imap.c \
+ mbfilter_euc_jp_win.c \
+ mbfilter_cp932.c \
+ mbfilter_cp51932.c \
+ mbfilter_euc_cn.c \
+ mbfilter_euc_kr.c \
+ mbfilter_uhc.c \
+ mbfilter_iso2022_kr.c \
+ mbfilter_cp866.c \
+ mbfilter_koi8r.c \
+ mbfilter_koi8u.c \
+ mbfilter_armscii8.c \
+ mbfilter_cp850.c \
+ html_entities.c \
+ cp932_table.h \
+ html_entities.h \
+ mbfilter_7bit.h \
+ mbfilter_ascii.h \
+ mbfilter_base64.h \
+ mbfilter_big5.h \
+ mbfilter_byte2.h \
+ mbfilter_byte4.h \
+ mbfilter_cp1251.h \
+ mbfilter_cp1252.h \
+ mbfilter_cp1254.h \
+ mbfilter_cp866.h \
+ mbfilter_cp932.h \
+ mbfilter_cp936.h \
+ mbfilter_euc_cn.h \
+ mbfilter_euc_jp.h \
+ mbfilter_euc_jp_win.h \
+ mbfilter_euc_kr.h \
+ mbfilter_euc_tw.h \
+ mbfilter_htmlent.h \
+ mbfilter_hz.h \
+ mbfilter_iso2022_kr.h \
+ mbfilter_iso8859_1.h \
+ mbfilter_iso8859_10.h \
+ mbfilter_iso8859_13.h \
+ mbfilter_iso8859_14.h \
+ mbfilter_iso8859_15.h \
+ mbfilter_iso8859_16.h \
+ mbfilter_iso8859_2.h \
+ mbfilter_iso8859_3.h \
+ mbfilter_iso8859_4.h \
+ mbfilter_iso8859_5.h \
+ mbfilter_iso8859_6.h \
+ mbfilter_iso8859_7.h \
+ mbfilter_iso8859_8.h \
+ mbfilter_iso8859_9.h \
+ mbfilter_jis.h \
+ mbfilter_koi8r.h \
+ mbfilter_koi8u.h \
+ mbfilter_armscii8.h \
+ mbfilter_qprint.h \
+ mbfilter_sjis.h \
+ mbfilter_ucs2.h \
+ mbfilter_ucs4.h \
+ mbfilter_uhc.h \
+ mbfilter_utf16.h \
+ mbfilter_utf32.h \
+ mbfilter_utf7.h \
+ mbfilter_utf7imap.h \
+ mbfilter_utf8.h \
+ mbfilter_uuencode.h \
+ mbfilter_cp51932.h \
+ mbfilter_cp850.h \
+ unicode_prop.h \
+ unicode_table_big5.h \
+ unicode_table_cns11643.h \
+ unicode_table_cp1251.h \
+ unicode_table_cp1252.h \
+ unicode_table_cp1254.h \
+ unicode_table_cp866.h \
+ unicode_table_cp932_ext.h \
+ unicode_table_cp936.h \
+ unicode_table_iso8859_10.h \
+ unicode_table_iso8859_13.h \
+ unicode_table_iso8859_14.h \
+ unicode_table_iso8859_15.h \
+ unicode_table_iso8859_16.h \
+ unicode_table_iso8859_2.h \
+ unicode_table_iso8859_3.h \
+ unicode_table_iso8859_4.h \
+ unicode_table_iso8859_5.h \
+ unicode_table_iso8859_6.h \
+ unicode_table_iso8859_7.h \
+ unicode_table_iso8859_8.h \
+ unicode_table_iso8859_9.h \
+ unicode_table_jis.h \
+ unicode_table_koi8r.h \
+ unicode_table_koi8u.h \
+ unicode_table_armscii8.h \
+ unicode_table_cp850.h \
+ unicode_table_uhc.h
mbfilter_iso8859_2.c: unicode_table_iso8859_2.h
diff --git a/ext/mbstring/libmbfl/filters/Makefile.bcc32 b/ext/mbstring/libmbfl/filters/Makefile.bcc32
index 03e1d88024..841c09632a 100644
--- a/ext/mbstring/libmbfl/filters/Makefile.bcc32
+++ b/ext/mbstring/libmbfl/filters/Makefile.bcc32
@@ -1,6 +1,56 @@
!include ..\rules.mak.bcc32
INCLUDES=$(INCLUDES) -I../mbfl
-OBJS=mbfilter_cp936.obj mbfilter_hz.obj mbfilter_euc_tw.obj mbfilter_big5.obj mbfilter_euc_jp.obj mbfilter_jis.obj mbfilter_iso8859_1.obj mbfilter_iso8859_2.obj mbfilter_cp1252.obj mbfilter_cp1251.obj mbfilter_ascii.obj mbfilter_iso8859_3.obj mbfilter_iso8859_4.obj mbfilter_iso8859_5.obj mbfilter_iso8859_6.obj mbfilter_iso8859_7.obj mbfilter_iso8859_8.obj mbfilter_iso8859_9.obj mbfilter_iso8859_10.obj mbfilter_iso8859_13.obj mbfilter_iso8859_14.obj mbfilter_iso8859_15.obj mbfilter_iso8859_16.obj mbfilter_htmlent.obj mbfilter_byte2.obj mbfilter_byte4.obj mbfilter_uuencode.obj mbfilter_base64.obj mbfilter_sjis.obj mbfilter_7bit.obj mbfilter_qprint.obj mbfilter_ucs4.obj mbfilter_ucs2.obj mbfilter_utf32.obj mbfilter_utf16.obj mbfilter_utf8.obj mbfilter_utf7.obj mbfilter_utf7imap.obj mbfilter_euc_jp_win.obj mbfilter_cp932.obj mbfilter_euc_cn.obj mbfilter_euc_kr.obj mbfilter_uhc.obj mbfilter_iso2022_kr.obj mbfilter_cp866.obj mbfilter_koi8r.obj html_entities.obj mbfilter_armscii8.obj
+OBJS=mbfilter_cp936.obj \
+ mbfilter_hz.obj \
+ mbfilter_euc_tw.obj \
+ mbfilter_big5.obj \
+ mbfilter_euc_jp.obj \
+ mbfilter_jis.obj \
+ mbfilter_iso8859_1.obj \
+ mbfilter_iso8859_2.obj \
+ mbfilter_cp1252.obj \
+ mbfilter_cp1251.obj \
+ mbfilter_cp1254.obj \
+ mbfilter_ascii.obj \
+ mbfilter_iso8859_3.obj \
+ mbfilter_iso8859_4.obj \
+ mbfilter_iso8859_5.obj \
+ mbfilter_iso8859_6.obj \
+ mbfilter_iso8859_7.obj \
+ mbfilter_iso8859_8.obj \
+ mbfilter_iso8859_9.obj \
+ mbfilter_iso8859_10.obj \
+ mbfilter_iso8859_13.obj \
+ mbfilter_iso8859_14.obj \
+ mbfilter_iso8859_15.obj \
+ mbfilter_iso8859_16.obj \
+ mbfilter_htmlent.obj \
+ mbfilter_byte2.obj \
+ mbfilter_byte4.obj \
+ mbfilter_uuencode.obj \
+ mbfilter_base64.obj \
+ mbfilter_sjis.obj \
+ mbfilter_7bit.obj \
+ mbfilter_qprint.obj \
+ mbfilter_ucs4.obj \
+ mbfilter_ucs2.obj \
+ mbfilter_utf32.obj \
+ mbfilter_utf16.obj \
+ mbfilter_utf8.obj \
+ mbfilter_utf7.obj \
+ mbfilter_utf7imap.obj \
+ mbfilter_euc_jp_win.obj \
+ mbfilter_cp932.obj \
+ mbfilter_euc_cn.obj \
+ mbfilter_euc_kr.obj \
+ mbfilter_uhc.obj \
+ mbfilter_iso2022_kr.obj \
+ mbfilter_cp866.obj \
+ mbfilter_koi8r.obj \
+ mbfilter_koi8u.obj \
+ html_entities.obj \
+ mbfilter_armscii8.obj \
+ mbfilter_cp850.obj
all: $(OBJS)
diff --git a/ext/mbstring/libmbfl/filters/mbfilter_cp1254.c b/ext/mbstring/libmbfl/filters/mbfilter_cp1254.c
new file mode 100644
index 0000000000..7e933425f4
--- /dev/null
+++ b/ext/mbstring/libmbfl/filters/mbfilter_cp1254.c
@@ -0,0 +1,157 @@
+/*
+ * "streamable kanji code filter and converter"
+ * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved.
+ *
+ * LICENSE NOTICES
+ *
+ * This file is part of "streamable kanji code filter and converter",
+ * which is distributed under the terms of GNU Lesser General Public
+ * License (version 2) as published by the Free Software Foundation.
+ *
+ * This software is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with "streamable kanji code filter and converter";
+ * if not, write to the Free Software Foundation, Inc., 59 Temple Place,
+ * Suite 330, Boston, MA 02111-1307 USA
+ *
+ * The author of this part: Haluk AKIN
+ *
+ */
+/*
+ * The source code included in this files was separated from mbfilter_ru.c
+ * by moriyoshi koizumi on 4 dec 2002.
+ *
+ */
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "mbfilter.h"
+#include "mbfilter_cp1254.h"
+#include "unicode_table_cp1254.h"
+
+static int mbfl_filt_ident_cp1254(int c, mbfl_identify_filter *filter);
+
+static const char *mbfl_encoding_cp1254_aliases[] = {"CP1254", "CP-1254", "WINDOWS-1254", NULL};
+
+const mbfl_encoding mbfl_encoding_cp1254 = {
+ mbfl_no_encoding_cp1254,
+ "Windows-1254",
+ "Windows-1254",
+ (const char *(*)[])&mbfl_encoding_cp1254_aliases,
+ NULL,
+ MBFL_ENCTYPE_SBCS
+};
+
+const struct mbfl_identify_vtbl vtbl_identify_cp1254 = {
+ mbfl_no_encoding_cp1254,
+ mbfl_filt_ident_common_ctor,
+ mbfl_filt_ident_common_dtor,
+ mbfl_filt_ident_cp1254
+};
+
+const struct mbfl_convert_vtbl vtbl_cp1254_wchar = {
+ mbfl_no_encoding_cp1254,
+ mbfl_no_encoding_wchar,
+ mbfl_filt_conv_common_ctor,
+ mbfl_filt_conv_common_dtor,
+ mbfl_filt_conv_cp1254_wchar,
+ mbfl_filt_conv_common_flush
+};
+
+const struct mbfl_convert_vtbl vtbl_wchar_cp1254 = {
+ mbfl_no_encoding_wchar,
+ mbfl_no_encoding_cp1254,
+ mbfl_filt_conv_common_ctor,
+ mbfl_filt_conv_common_dtor,
+ mbfl_filt_conv_wchar_cp1254,
+ mbfl_filt_conv_common_flush
+};
+
+#define CK(statement) do { if ((statement) < 0) return (-1); } while (0)
+
+/*
+ * wchar => cp1254
+ */
+int
+mbfl_filt_conv_wchar_cp1254(int c, mbfl_convert_filter *filter)
+{
+ int s, n;
+
+ if (c < 0x80) {
+ s = c;
+ } else {
+ s = -1;
+ n = cp1254_ucs_table_len-1;
+ while (n >= 0) {
+ if (c == cp1254_ucs_table[n] && c != 0xfffe) {
+ s = cp1254_ucs_table_min + n;
+ break;
+ }
+ n--;
+ }
+ if (s <= 0 && (c & ~MBFL_WCSPLANE_MASK) == MBFL_WCSPLANE_CP1254) {
+ s = c & MBFL_WCSPLANE_MASK;
+ }
+ }
+
+ if (s >= 0) {
+ CK((*filter->output_function)(s, filter->data));
+ } else {
+ if (filter->illegal_mode != MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE) {
+ CK(mbfl_filt_conv_illegal_output(c, filter));
+ }
+ }
+
+ return c;
+}
+
+/*
+ * cp1254 => wchar
+ */
+int
+mbfl_filt_conv_cp1254_wchar(int c, mbfl_convert_filter *filter)
+{
+ int s;
+
+ if (c >= 0 && c < cp1254_ucs_table_min) {
+ s = c;
+ } else if (c >= cp1254_ucs_table_min && c < 0x100) {
+ s = cp1254_ucs_table[c - cp1254_ucs_table_min];
+ if (s <= 0) {
+ s = c;
+ s &= MBFL_WCSPLANE_MASK;
+ s |= MBFL_WCSPLANE_CP1254;
+ }
+ } else {
+ s = c;
+ s &= MBFL_WCSGROUP_MASK;
+ s |= MBFL_WCSGROUP_THROUGH;
+ }
+
+ CK((*filter->output_function)(s, filter->data));
+
+ return c;
+}
+
+/* We only distinguish the MS extensions to ISO-8859-1.
+ * Actually, this is pretty much a NO-OP, since the identification
+ * system doesn't allow us to discriminate between a positive match,
+ * a possible match and a definite non-match.
+ * The problem here is that cp1254 looks like SJIS for certain chars.
+ * */
+static int mbfl_filt_ident_cp1254(int c, mbfl_identify_filter *filter)
+{
+ if (c >= 0x80 && c < 0xff)
+ filter->flag = 0;
+ else
+ filter->flag = 1; /* not it */
+ return c;
+}
+
+
diff --git a/ext/mbstring/libmbfl/filters/mbfilter_cp1254.h b/ext/mbstring/libmbfl/filters/mbfilter_cp1254.h
new file mode 100644
index 0000000000..3200d520eb
--- /dev/null
+++ b/ext/mbstring/libmbfl/filters/mbfilter_cp1254.h
@@ -0,0 +1,43 @@
+/*
+ * "streamable kanji code filter and converter"
+ * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved.
+ *
+ * LICENSE NOTICES
+ *
+ * This file is part of "streamable kanji code filter and converter",
+ * which is distributed under the terms of GNU Lesser General Public
+ * License (version 2) as published by the Free Software Foundation.
+ *
+ * This software is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with "streamable kanji code filter and converter";
+ * if not, write to the Free Software Foundation, Inc., 59 Temple Place,
+ * Suite 330, Boston, MA 02111-1307 USA
+ *
+ * The author of this part: Haluk AKIN
+ *
+ */
+/*
+ * the source code included in this files was separated from mbfilter.c
+ * by moriyoshi koizumi on 4 dec 2002.
+ *
+ */
+
+#ifndef MBFL_MBFILTER_CP1254_H
+#define MBFL_MBFILTER_CP1254_H
+
+#include "mbfilter.h"
+
+extern const mbfl_encoding mbfl_encoding_cp1254;
+extern const struct mbfl_identify_vtbl vtbl_identify_cp1254;
+extern const struct mbfl_convert_vtbl vtbl_cp1254_wchar;
+extern const struct mbfl_convert_vtbl vtbl_wchar_cp1254;
+
+int mbfl_filt_conv_wchar_cp1254(int c, mbfl_convert_filter *filter);
+int mbfl_filt_conv_cp1254_wchar(int c, mbfl_convert_filter *filter);
+
+#endif /* MBFL_MBFILTER_CP1254_H */
diff --git a/ext/mbstring/libmbfl/filters/mbfilter_cp850.c b/ext/mbstring/libmbfl/filters/mbfilter_cp850.c
new file mode 100644
index 0000000000..5388c048b1
--- /dev/null
+++ b/ext/mbstring/libmbfl/filters/mbfilter_cp850.c
@@ -0,0 +1,147 @@
+/*
+ * "streamable kanji code filter and converter"
+ * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved.
+ *
+ * LICENSE NOTICES
+ *
+ * This file is part of "streamable kanji code filter and converter",
+ * which is distributed under the terms of GNU Lesser General Public
+ * License (version 2) as published by the Free Software Foundation.
+ *
+ * This software is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with "streamable kanji code filter and converter";
+ * if not, write to the Free Software Foundation, Inc., 59 Temple Place,
+ * Suite 330, Boston, MA 02111-1307 USA
+ *
+ * The author of this part: Den V. Tsopa
+ * Adaption for CP850: D. Giffeler
+ *
+ */
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "mbfilter.h"
+#include "mbfilter_cp850.h"
+#include "unicode_table_cp850.h"
+
+static int mbfl_filt_ident_cp850(int c, mbfl_identify_filter *filter);
+
+static const char *mbfl_encoding_cp850_aliases[] = {"CP850", "CP-850", "IBM-850", NULL};
+
+const mbfl_encoding mbfl_encoding_cp850 = {
+ mbfl_no_encoding_cp850,
+ "CP850",
+ "CP850",
+ (const char *(*)[])&mbfl_encoding_cp850_aliases,
+ NULL,
+ MBFL_ENCTYPE_SBCS
+};
+
+const struct mbfl_identify_vtbl vtbl_identify_cp850 = {
+ mbfl_no_encoding_cp850,
+ mbfl_filt_ident_common_ctor,
+ mbfl_filt_ident_common_dtor,
+ mbfl_filt_ident_cp850
+};
+
+const struct mbfl_convert_vtbl vtbl_wchar_cp850 = {
+ mbfl_no_encoding_wchar,
+ mbfl_no_encoding_cp850,
+ mbfl_filt_conv_common_ctor,
+ mbfl_filt_conv_common_dtor,
+ mbfl_filt_conv_wchar_cp850,
+ mbfl_filt_conv_common_flush
+};
+
+const struct mbfl_convert_vtbl vtbl_cp850_wchar = {
+ mbfl_no_encoding_cp850,
+ mbfl_no_encoding_wchar,
+ mbfl_filt_conv_common_ctor,
+ mbfl_filt_conv_common_dtor,
+ mbfl_filt_conv_cp850_wchar,
+ mbfl_filt_conv_common_flush
+};
+
+#define CK(statement) do { if ((statement) < 0) return (-1); } while (0)
+
+/*
+ * cp850 => wchar
+ */
+int
+mbfl_filt_conv_cp850_wchar(int c, mbfl_convert_filter *filter)
+{
+ int s;
+
+ if (c >= 0 && c < cp850_ucs_table_min) {
+ s = c;
+ } else if (c >= cp850_ucs_table_min && c < 0x100) {
+ s = cp850_ucs_table[c - cp850_ucs_table_min];
+ if (s <= 0) {
+ s = c;
+ s &= MBFL_WCSPLANE_MASK;
+ s |= MBFL_WCSPLANE_CP850;
+ }
+ } else {
+ s = c;
+ s &= MBFL_WCSGROUP_MASK;
+ s |= MBFL_WCSGROUP_THROUGH;
+ }
+
+ CK((*filter->output_function)(s, filter->data));
+
+ return c;
+}
+
+/*
+ * wchar => cp850
+ */
+int
+mbfl_filt_conv_wchar_cp850(int c, mbfl_convert_filter *filter)
+{
+ int s, n;
+
+ if (c < 0x80) {
+ s = c;
+ } else {
+ s = -1;
+ n = cp850_ucs_table_len-1;
+ while (n >= 0) {
+ if (c == cp850_ucs_table[n]) {
+ s = cp850_ucs_table_min + n;
+ break;
+ }
+ n--;
+ }
+ if (s <= 0 && (c & ~MBFL_WCSPLANE_MASK) == MBFL_WCSPLANE_CP850) {
+ s = c & MBFL_WCSPLANE_MASK;
+ }
+ }
+
+ if (s >= 0) {
+ CK((*filter->output_function)(s, filter->data));
+ } else {
+ if (filter->illegal_mode != MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE) {
+ CK(mbfl_filt_conv_illegal_output(c, filter));
+ }
+ }
+
+ return c;
+}
+
+static int mbfl_filt_ident_cp850(int c, mbfl_identify_filter *filter)
+{
+ if (c >= 0x80 && c < 0xff)
+ filter->flag = 0;
+ else
+ filter->flag = 1; /* not it */
+ return c;
+}
+
+
diff --git a/ext/mbstring/libmbfl/filters/mbfilter_cp850.h b/ext/mbstring/libmbfl/filters/mbfilter_cp850.h
new file mode 100644
index 0000000000..a1e0f9c0c2
--- /dev/null
+++ b/ext/mbstring/libmbfl/filters/mbfilter_cp850.h
@@ -0,0 +1,37 @@
+/*
+ * "streamable kanji code filter and converter"
+ * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved.
+ *
+ * LICENSE NOTICES
+ *
+ * This file is part of "streamable kanji code filter and converter",
+ * which is distributed under the terms of GNU Lesser General Public
+ * License (version 2) as published by the Free Software Foundation.
+ *
+ * This software is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with "streamable kanji code filter and converter";
+ * if not, write to the Free Software Foundation, Inc., 59 Temple Place,
+ * Suite 330, Boston, MA 02111-1307 USA
+ *
+ * The author of this part: Den V. Tsopa
+ * Adaption for CP850: D. Giffeler
+ *
+ */
+
+#ifndef MBFL_MBFILTER_CP850_H
+#define MBFL_MBFILTER_CP850_H
+
+extern const mbfl_encoding mbfl_encoding_cp850;
+extern const struct mbfl_identify_vtbl vtbl_identify_cp850;
+extern const struct mbfl_convert_vtbl vtbl_wchar_cp850;
+extern const struct mbfl_convert_vtbl vtbl_cp850_wchar;
+
+int mbfl_filt_conv_cp850_wchar(int c, mbfl_convert_filter *filter);
+int mbfl_filt_conv_wchar_cp850(int c, mbfl_convert_filter *filter);
+
+#endif /* MBFL_MBFILTER_CP850_H */
diff --git a/ext/mbstring/libmbfl/filters/mbfilter_euc_jp_win.c b/ext/mbstring/libmbfl/filters/mbfilter_euc_jp_win.c
index bc07f13438..ece0c7ee41 100644
--- a/ext/mbstring/libmbfl/filters/mbfilter_euc_jp_win.c
+++ b/ext/mbstring/libmbfl/filters/mbfilter_euc_jp_win.c
@@ -204,6 +204,9 @@ mbfl_filt_conv_eucjpwin_wchar(int c, mbfl_convert_filter *filter)
s = (c1 - 0xa1)*94 + c - 0xa1;
if (s >= 0 && s < jisx0212_ucs_table_size) {
w = jisx0212_ucs_table[s];
+ if (w == 0x007e) {
+ w = 0xff5e; /* FULLWIDTH TILDE */
+ }
} else if (s >= (82*94) && s < (84*94)) { /* vender ext3 (83ku - 84ku) <-> CP932 (115ku -120ku) */
s = (c1<< 8) | c;
w = 0;
@@ -222,6 +225,9 @@ mbfl_filt_conv_eucjpwin_wchar(int c, mbfl_convert_filter *filter)
} else {
w = 0;
}
+ if (w == 0x00A6) {
+ w = 0xFFE4; /* FULLWIDTH BROKEN BAR */
+ }
if (w <= 0) {
w = ((c1 & 0x7f) << 8) | (c & 0x7f);
w &= MBFL_WCSPLANE_MASK;
@@ -274,6 +280,9 @@ mbfl_filt_conv_wchar_eucjpwin(int c, mbfl_convert_filter *filter)
c2 = s1%94 + 0xa1;
s1 = (c1 << 8) | c2;
}
+ if (s1 == 0xa2f1) {
+ s1 = 0x2d62; /* NUMERO SIGN */
+ }
if (s1 <= 0) {
c1 = c & ~MBFL_WCSPLANE_MASK;
if (c1 == MBFL_WCSPLANE_WINCP932) {
@@ -311,6 +320,8 @@ mbfl_filt_conv_wchar_eucjpwin(int c, mbfl_convert_filter *filter)
s1 = 0x2172;
} else if (c == 0xffe2) { /* FULLWIDTH NOT SIGN */
s1 = 0x224c;
+ } else if (c == 0xff5e) { /* FULLWIDTH TILDE */
+ s1 = 0x2141;
} else {
s1 = -1;
c1 = 0;
diff --git a/ext/mbstring/libmbfl/filters/mbfilter_iso2022_jp_ms.c b/ext/mbstring/libmbfl/filters/mbfilter_iso2022_jp_ms.c
new file mode 100644
index 0000000000..df961677d1
--- /dev/null
+++ b/ext/mbstring/libmbfl/filters/mbfilter_iso2022_jp_ms.c
@@ -0,0 +1,522 @@
+/*
+ * "streamable kanji code filter and converter"
+ * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved.
+ *
+ * LICENSE NOTICES
+ *
+ * This file is part of "streamable kanji code filter and converter",
+ * which is distributed under the terms of GNU Lesser General Public
+ * License (version 2) as published by the Free Software Foundation.
+ *
+ * This software is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with "streamable kanji code filter and converter";
+ * if not, write to the Free Software Foundation, Inc., 59 Temple Place,
+ * Suite 330, Boston, MA 02111-1307 USA
+ *
+ * The author of this file:
+ *
+ */
+/*
+ * The source code included in this files was separated from mbfilter_ja.c
+ * by moriyoshi koizumi on 4 dec 2002.
+ *
+ */
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "mbfilter.h"
+#include "mbfilter_iso2022_jp_ms.h"
+
+#include "unicode_table_cp932_ext.h"
+#include "unicode_table_jis.h"
+#include "cp932_table.h"
+
+static int mbfl_filt_ident_2022jpms(int c, mbfl_identify_filter *filter);
+
+static const char *mbfl_encoding_2022jpms_aliases[] = {"ISO2022JPMS", NULL};
+
+const mbfl_encoding mbfl_encoding_2022jpms = {
+ mbfl_no_encoding_2022jpms,
+ "ISO-2022-JP-MS",
+ "ISO-2022-JP",
+ (const char *(*)[])&mbfl_encoding_2022jpms_aliases,
+ NULL,
+ MBFL_ENCTYPE_MBCS | MBFL_ENCTYPE_SHFTCODE
+};
+
+const struct mbfl_identify_vtbl vtbl_identify_2022jpms = {
+ mbfl_no_encoding_2022jpms,
+ mbfl_filt_ident_common_ctor,
+ mbfl_filt_ident_common_dtor,
+ mbfl_filt_ident_2022jpms
+};
+
+const struct mbfl_convert_vtbl vtbl_2022jpms_wchar = {
+ mbfl_no_encoding_2022jpms,
+ mbfl_no_encoding_wchar,
+ mbfl_filt_conv_common_ctor,
+ mbfl_filt_conv_common_dtor,
+ mbfl_filt_conv_2022jpms_wchar,
+ mbfl_filt_conv_common_flush
+};
+
+const struct mbfl_convert_vtbl vtbl_wchar_2022jpms = {
+ mbfl_no_encoding_wchar,
+ mbfl_no_encoding_2022jpms,
+ mbfl_filt_conv_common_ctor,
+ mbfl_filt_conv_common_dtor,
+ mbfl_filt_conv_wchar_2022jpms,
+ mbfl_filt_conv_any_2022jpms_flush
+};
+
+#define CK(statement) do { if ((statement) < 0) return (-1); } while (0)
+
+#define sjistoidx(c1, c2) \
+ (((c1) > 0x9f) \
+ ? (((c1) - 0xc1) * 188 + (c2) - (((c2) > 0x7e) ? 0x41 : 0x40)) \
+ : (((c1) - 0x81) * 188 + (c2) - (((c2) > 0x7e) ? 0x41 : 0x40)))
+#define idxtojis1(c) (((c) / 94) + 0x21)
+#define idxtojis2(c) (((c) % 94) + 0x21)
+
+/*
+ * ISO-2022-JP-MS => wchar
+ */
+int
+mbfl_filt_conv_2022jpms_wchar(int c, mbfl_convert_filter *filter)
+{
+ int c1, s, w;
+
+retry:
+ switch (filter->status & 0xf) {
+/* case 0x00: ASCII */
+/* case 0x10: X 0201 latin */
+/* case 0x20: X 0201 kana */
+/* case 0x80: X 0208 */
+/* case 0xa0: UDC */
+ case 0:
+ if (c == 0x1b) {
+ filter->status += 2;
+ } else if (filter->status == 0x20 && c > 0x20 && c < 0x60) { /* kana */
+ CK((*filter->output_function)(0xff40 + c, filter->data));
+ } else if ((filter->status == 0x80 || filter->status == 0xa0) && c > 0x20 && c < 0x80) { /* kanji first char */
+ filter->cache = c;
+ filter->status += 1;
+ } else if (c >= 0 && c < 0x80) { /* latin, CTLs */
+ CK((*filter->output_function)(c, filter->data));
+ } else if (c > 0xa0 && c < 0xe0) { /* GR kana */
+ CK((*filter->output_function)(0xfec0 + c, filter->data));
+ } else {
+ w = c & MBFL_WCSGROUP_MASK;
+ w |= MBFL_WCSGROUP_THROUGH;
+ CK((*filter->output_function)(w, filter->data));
+ }
+ break;
+
+/* case 0x81: X 0208 second char */
+/* case 0xa1: UDC second char */
+ case 1:
+ w = 0;
+ filter->status &= ~0xf;
+ c1 = filter->cache;
+ if (c > 0x20 && c < 0x7f) {
+ s = (c1 - 0x21)*94 + c - 0x21;
+ if (filter->status == 0x80) {
+ if (s <= 137) {
+ if (s == 31) {
+ w = 0xff3c; /* FULLWIDTH REVERSE SOLIDUS */
+ } else if (s == 32) {
+ w = 0xff5e; /* FULLWIDTH TILDE */
+ } else if (s == 33) {
+ w = 0x2225; /* PARALLEL TO */
+ } else if (s == 60) {
+ w = 0xff0d; /* FULLWIDTH HYPHEN-MINUS */
+ } else if (s == 80) {
+ w = 0xffe0; /* FULLWIDTH CENT SIGN */
+ } else if (s == 81) {
+ w = 0xffe1; /* FULLWIDTH POUND SIGN */
+ } else if (s == 137) {
+ w = 0xffe2; /* FULLWIDTH NOT SIGN */
+ }
+ }
+ if (w == 0) {
+ if (s >= cp932ext1_ucs_table_min && s < cp932ext1_ucs_table_max) { /* vendor ext1 (13ku) */
+ w = cp932ext1_ucs_table[s - cp932ext1_ucs_table_min];
+ } else if (s >= 0 && s < jisx0208_ucs_table_size) {
+ w = jisx0208_ucs_table[s];
+ } else if (s >= cp932ext2_ucs_table_min && s < cp932ext2_ucs_table_max) { /* vendor ext2 (89ku - 92ku) */
+ w = cp932ext2_ucs_table[s - cp932ext2_ucs_table_min];
+ } else {
+ w = 0;
+ }
+ }
+ if (w <= 0) {
+ w = (c1 << 8) | c;
+ w &= MBFL_WCSPLANE_MASK;
+ w |= MBFL_WCSPLANE_JIS0208;
+ }
+ CK((*filter->output_function)(w, filter->data));
+ } else {
+ if (c1 > 0x20 && c1 < 0x35) {
+ w = 0xe000 + (c1 - 0x21)*94 + c - 0x21;
+ }
+ if (w <= 0) {
+ w = (((c1 - 0x21) + 0x7f) << 8) | c;
+ w &= MBFL_WCSPLANE_MASK;
+ w |= MBFL_WCSPLANE_JIS0208;
+ }
+ CK((*filter->output_function)(w, filter->data));
+ }
+ } else if (c == 0x1b) {
+ filter->status += 2;
+ } else if ((c >= 0 && c < 0x21) || c == 0x7f) { /* CTLs */
+ CK((*filter->output_function)(c, filter->data));
+ } else {
+ w = (c1 << 8) | c;
+ w &= MBFL_WCSGROUP_MASK;
+ w |= MBFL_WCSGROUP_THROUGH;
+ CK((*filter->output_function)(w, filter->data));
+ }
+ break;
+
+ /* ESC */
+/* case 0x02: */
+/* case 0x12: */
+/* case 0x22: */
+/* case 0x82: */
+/* case 0xa2: */
+ case 2:
+ if (c == 0x24) { /* '$' */
+ filter->status++;
+ } else if (c == 0x28) { /* '(' */
+ filter->status += 3;
+ } else {
+ filter->status &= ~0xf;
+ CK((*filter->output_function)(0x1b, filter->data));
+ goto retry;
+ }
+ break;
+
+ /* ESC $ */
+/* case 0x03: */
+/* case 0x13: */
+/* case 0x23: */
+/* case 0x83: */
+/* case 0xa3: */
+ case 3:
+ if (c == 0x40 || c == 0x42) { /* '@' or 'B' */
+ filter->status = 0x80;
+ } else if (c == 0x28) { /* '(' */
+ filter->status++;
+ } else {
+ filter->status &= ~0xf;
+ CK((*filter->output_function)(0x1b, filter->data));
+ CK((*filter->output_function)(0x24, filter->data));
+ goto retry;
+ }
+ break;
+
+ /* ESC $ ( */
+/* case 0x04: */
+/* case 0x14: */
+/* case 0x24: */
+/* case 0x84: */
+/* case 0xa4: */
+ case 4:
+ if (c == 0x40 || c == 0x42) { /* '@' or 'B' */
+ filter->status = 0x80;
+ } else if (c == 0x3f) { /* '?' */
+ filter->status = 0xa0;
+ } else {
+ filter->status &= ~0xf;
+ CK((*filter->output_function)(0x1b, filter->data));
+ CK((*filter->output_function)(0x24, filter->data));
+ CK((*filter->output_function)(0x28, filter->data));
+ goto retry;
+ }
+ break;
+
+ /* ESC ( */
+/* case 0x05: */
+/* case 0x15: */
+/* case 0x25: */
+/* case 0x85: */
+/* case 0xa5: */
+ case 5:
+ if (c == 0x42) { /* 'B' */
+ filter->status = 0;
+ } else if (c == 0x4a) { /* 'J' */
+ filter->status = 0;
+ } else if (c == 0x49) { /* 'I' */
+ filter->status = 0x20;
+ } else {
+ filter->status &= ~0xf;
+ CK((*filter->output_function)(0x1b, filter->data));
+ CK((*filter->output_function)(0x28, filter->data));
+ goto retry;
+ }
+ break;
+
+ default:
+ filter->status = 0;
+ break;
+ }
+
+ return c;
+}
+
+static int
+cp932ext3_cp932ext2_jis(int c)
+{
+ int idx;
+
+ idx = sjistoidx(0xfa, 0x40) + c;
+ if (idx >= sjistoidx(0xfa, 0x5c))
+ idx -= sjistoidx(0xfa, 0x5c) - sjistoidx(0xed, 0x40);
+ else if (idx >= sjistoidx(0xfa, 0x55))
+ idx -= sjistoidx(0xfa, 0x55) - sjistoidx(0xee, 0xfa);
+ else if (idx >= sjistoidx(0xfa, 0x40))
+ idx -= sjistoidx(0xfa, 0x40) - sjistoidx(0xee, 0xef);
+ return idxtojis1(idx) << 8 | idxtojis2(idx);
+}
+
+/*
+ * wchar => ISO-2022-JP-MS
+ */
+int
+mbfl_filt_conv_wchar_2022jpms(int c, mbfl_convert_filter *filter)
+{
+ int c1, c2, s1, s2;
+
+ s1 = 0;
+ s2 = 0;
+ if (c >= ucs_a1_jis_table_min && c < ucs_a1_jis_table_max) {
+ s1 = ucs_a1_jis_table[c - ucs_a1_jis_table_min];
+ } else if (c >= ucs_a2_jis_table_min && c < ucs_a2_jis_table_max) {
+ s1 = ucs_a2_jis_table[c - ucs_a2_jis_table_min];
+ } else if (c >= ucs_i_jis_table_min && c < ucs_i_jis_table_max) {
+ s1 = ucs_i_jis_table[c - ucs_i_jis_table_min];
+ } else if (c >= ucs_r_jis_table_min && c < ucs_r_jis_table_max) {
+ s1 = ucs_r_jis_table[c - ucs_r_jis_table_min];
+ } else if (c >= 0xe000 && c < (0xe000 + 20*94)) { /* user (95ku - 114ku) */
+ s1 = c - 0xe000;
+ c1 = s1/94 + 0x7f;
+ c2 = s1%94 + 0x21;
+ s1 = (c1 << 8) | c2;
+ }
+ if (s1 <= 0) {
+ c1 = c & ~MBFL_WCSPLANE_MASK;
+ if (c1 == MBFL_WCSPLANE_WINCP932) {
+ s1 = c & MBFL_WCSPLANE_MASK;
+ s2 = 1;
+ } else if (c1 == MBFL_WCSPLANE_JIS0208) {
+ s1 = c & MBFL_WCSPLANE_MASK;
+ } else if (c1 == MBFL_WCSPLANE_JIS0212) {
+ s1 = c & MBFL_WCSPLANE_MASK;
+ s1 |= 0x8080;
+ } else if (c == 0xa5) { /* YEN SIGN */
+ s1 = 0x216f; /* FULLWIDTH YEN SIGN */
+ } else if (c == 0x203e) { /* OVER LINE */
+ s1 = 0x2131; /* FULLWIDTH MACRON */
+ } else if (c == 0xff3c) { /* FULLWIDTH REVERSE SOLIDUS */
+ s1 = 0x2140;
+ } else if (c == 0xff5e) { /* FULLWIDTH TILDE */
+ s1 = 0x2141;
+ } else if (c == 0x2225) { /* PARALLEL TO */
+ s1 = 0x2142;
+ } else if (c == 0xff0d) { /* FULLWIDTH HYPHEN-MINUS */
+ s1 = 0x215d;
+ } else if (c == 0xffe0) { /* FULLWIDTH CENT SIGN */
+ s1 = 0x2171;
+ } else if (c == 0xffe1) { /* FULLWIDTH POUND SIGN */
+ s1 = 0x2172;
+ } else if (c == 0xffe2) { /* FULLWIDTH NOT SIGN */
+ s1 = 0x224c;
+ }
+ }
+ if ((s1 <= 0) || (s1 >= 0xa1a1 && s2 == 0)) { /* not found or X 0212 */
+ s1 = -1;
+ c1 = 0;
+ c2 = cp932ext1_ucs_table_max - cp932ext1_ucs_table_min;
+ while (c1 < c2) { /* CP932 vendor ext1 (13ku) */
+ if (c == cp932ext1_ucs_table[c1]) {
+ s1 = ((c1/94 + 0x2d) << 8) + (c1%94 + 0x21);
+ break;
+ }
+ c1++;
+ }
+ if (s1 <= 0) {
+ c1 = 0;
+ c2 = cp932ext3_ucs_table_max - cp932ext3_ucs_table_min;
+ while (c1 < c2) { /* CP932 vendor ext3 (115ku - 119ku) */
+ if (c == cp932ext3_ucs_table[c1]) {
+ s1 = cp932ext3_cp932ext2_jis(c1);
+ break;
+ }
+ c1++;
+ }
+ }
+ if (c == 0) {
+ s1 = 0;
+ } else if (s1 <= 0) {
+ s1 = -1;
+ }
+ }
+ if (s1 >= 0) {
+ if (s1 < 0x80) { /* latin */
+ if ((filter->status & 0xff00) != 0) {
+ CK((*filter->output_function)(0x1b, filter->data)); /* ESC */
+ CK((*filter->output_function)(0x28, filter->data)); /* '(' */
+ CK((*filter->output_function)(0x42, filter->data)); /* 'B' */
+ }
+ CK((*filter->output_function)(s1, filter->data));
+ filter->status = 0;
+ } else if (s1 > 0xa0 && s1 < 0xe0) { /* kana */
+ if ((filter->status & 0xff00) != 0x100) {
+ CK((*filter->output_function)(0x1b, filter->data)); /* ESC */
+ CK((*filter->output_function)(0x28, filter->data)); /* '(' */
+ CK((*filter->output_function)(0x49, filter->data)); /* 'I' */
+ }
+ filter->status = 0x100;
+ CK((*filter->output_function)(s1 & 0x7f, filter->data));
+ } else if (s1 < 0x7e7f) { /* X 0208 */
+ if ((filter->status & 0xff00) != 0x200) {
+ CK((*filter->output_function)(0x1b, filter->data)); /* ESC */
+ CK((*filter->output_function)(0x24, filter->data)); /* '$' */
+ CK((*filter->output_function)(0x42, filter->data)); /* 'B' */
+ }
+ filter->status = 0x200;
+ CK((*filter->output_function)((s1 >> 8) & 0xff, filter->data));
+ CK((*filter->output_function)(s1 & 0x7f, filter->data));
+ } else if (s1 < 0x927f) { /* UDC */
+ if ((filter->status & 0xff00) != 0x800) {
+ CK((*filter->output_function)(0x1b, filter->data)); /* ESC */
+ CK((*filter->output_function)(0x24, filter->data)); /* '$' */
+ CK((*filter->output_function)(0x28, filter->data)); /* '(' */
+ CK((*filter->output_function)(0x3f, filter->data)); /* '?' */
+ }
+ filter->status = 0x800;
+ CK((*filter->output_function)(((s1 >> 8) - 0x5e) & 0x7f, filter->data));
+ CK((*filter->output_function)(s1 & 0x7f, filter->data));
+ }
+ } else {
+ if (filter->illegal_mode != MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE) {
+ CK(mbfl_filt_conv_illegal_output(c, filter));
+ }
+ }
+
+ return c;
+}
+
+int
+mbfl_filt_conv_any_2022jpms_flush(mbfl_convert_filter *filter)
+{
+ /* back to latin */
+ if ((filter->status & 0xff00) != 0) {
+ CK((*filter->output_function)(0x1b, filter->data)); /* ESC */
+ CK((*filter->output_function)(0x28, filter->data)); /* '(' */
+ CK((*filter->output_function)(0x42, filter->data)); /* 'B' */
+ }
+ filter->status &= 0xff;
+ return 0;
+}
+
+static int mbfl_filt_ident_2022jpms(int c, mbfl_identify_filter *filter)
+{
+retry:
+ switch (filter->status & 0xf) {
+/* case 0x00: ASCII */
+/* case 0x10: X 0201 latin */
+/* case 0x20: X 0201 kana */
+/* case 0x80: X 0208 */
+/* case 0xa0: X UDC */
+ case 0:
+ if (c == 0x1b) {
+ filter->status += 2;
+ } else if ((filter->status == 0x80 || filter->status == 0xa0) && c > 0x20 && c < 0x80) { /* kanji first char */
+ filter->status += 1;
+ } else if (c >= 0 && c < 0x80) { /* latin, CTLs */
+ ;
+ } else {
+ filter->flag = 1; /* bad */
+ }
+ break;
+
+/* case 0x81: X 0208 second char */
+/* case 0xa1: UDC second char */
+ case 1:
+ filter->status &= ~0xf;
+ if (c == 0x1b) {
+ goto retry;
+ } else if (c < 0x21 || c > 0x7e) { /* bad */
+ filter->flag = 1;
+ }
+ break;
+
+ /* ESC */
+ case 2:
+ if (c == 0x24) { /* '$' */
+ filter->status++;
+ } else if (c == 0x28) { /* '(' */
+ filter->status += 3;
+ } else {
+ filter->flag = 1; /* bad */
+ filter->status &= ~0xf;
+ goto retry;
+ }
+ break;
+
+ /* ESC $ */
+ case 3:
+ if (c == 0x40 || c == 0x42) { /* '@' or 'B' */
+ filter->status = 0x80;
+ } else if (c == 0x28) { /* '(' */
+ filter->status++;
+ } else {
+ filter->flag = 1; /* bad */
+ filter->status &= ~0xf;
+ goto retry;
+ }
+ break;
+
+ /* ESC $ ( */
+ case 4:
+ if (c == 0x40 || c == 0x42) { /* '@' or 'B' */
+ filter->status = 0x80;
+ } else if (c == 0x3f) { /* '?' */
+ filter->status = 0xa0;
+ } else {
+ filter->flag = 1; /* bad */
+ filter->status &= ~0xf;
+ goto retry;
+ }
+ break;
+
+ /* ESC ( */
+ case 5:
+ if (c == 0x42) { /* 'B' */
+ filter->status = 0;
+ } else if (c == 0x4a) { /* 'J' */
+ filter->status = 0;
+ } else if (c == 0x49) { /* 'I' */
+ filter->status = 0x20;
+ } else {
+ filter->flag = 1; /* bad */
+ filter->status &= ~0xf;
+ goto retry;
+ }
+ break;
+
+ default:
+ filter->status = 0;
+ break;
+ }
+
+ return c;
+}
diff --git a/ext/mbstring/libmbfl/filters/mbfilter_iso2022_jp_ms.h b/ext/mbstring/libmbfl/filters/mbfilter_iso2022_jp_ms.h
new file mode 100644
index 0000000000..8479a45095
--- /dev/null
+++ b/ext/mbstring/libmbfl/filters/mbfilter_iso2022_jp_ms.h
@@ -0,0 +1,44 @@
+/*
+ * "streamable kanji code filter and converter"
+ * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved.
+ *
+ * LICENSE NOTICES
+ *
+ * This file is part of "streamable kanji code filter and converter",
+ * which is distributed under the terms of GNU Lesser General Public
+ * License (version 2) as published by the Free Software Foundation.
+ *
+ * This software is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with "streamable kanji code filter and converter";
+ * if not, write to the Free Software Foundation, Inc., 59 Temple Place,
+ * Suite 330, Boston, MA 02111-1307 USA
+ *
+ * The author of this file:
+ *
+ */
+/*
+ * The source code included in this files was separated from mbfilter_ja.c
+ * by moriyoshi koizumi on 4 dec 2002.
+ *
+ */
+
+#ifndef MBFL_MBFILTER_ISO2022_JP_MS_H
+#define MBFL_MBFILTER_ISO2022_JP_MS_H
+
+#include "mbfilter.h"
+
+extern const mbfl_encoding mbfl_encoding_2022jpms;
+extern const struct mbfl_identify_vtbl vtbl_identify_2022jpms;
+extern const struct mbfl_convert_vtbl vtbl_2022jpms_wchar;
+extern const struct mbfl_convert_vtbl vtbl_wchar_2022jpms;
+
+int mbfl_filt_conv_2022jpms_wchar(int c, mbfl_convert_filter *filter);
+int mbfl_filt_conv_wchar_2022jpms(int c, mbfl_convert_filter *filter);
+int mbfl_filt_conv_any_2022jpms_flush(mbfl_convert_filter *filter);
+
+#endif /* MBFL_MBFILTER_ISO2022_JP_MS_H */
diff --git a/ext/mbstring/libmbfl/filters/mbfilter_jis.c b/ext/mbstring/libmbfl/filters/mbfilter_jis.c
index 3657658ba8..58336d4e28 100644
--- a/ext/mbstring/libmbfl/filters/mbfilter_jis.c
+++ b/ext/mbstring/libmbfl/filters/mbfilter_jis.c
@@ -58,15 +58,6 @@ const mbfl_encoding mbfl_encoding_2022jp = {
MBFL_ENCTYPE_MBCS | MBFL_ENCTYPE_SHFTCODE
};
-const mbfl_encoding mbfl_encoding_2022jpms = {
- mbfl_no_encoding_2022jpms,
- "ISO-2022-JP-MS",
- "ISO-2022-JP-MS",
- NULL,
- NULL,
- MBFL_ENCTYPE_MBCS | MBFL_ENCTYPE_SHFTCODE
-};
-
const struct mbfl_identify_vtbl vtbl_identify_jis = {
mbfl_no_encoding_jis,
mbfl_filt_ident_common_ctor,
@@ -81,13 +72,6 @@ const struct mbfl_identify_vtbl vtbl_identify_2022jp = {
mbfl_filt_ident_2022jp
};
-const struct mbfl_identify_vtbl vtbl_identify_2022jpms = {
- mbfl_no_encoding_2022jpms,
- mbfl_filt_ident_common_ctor,
- mbfl_filt_ident_common_dtor,
- mbfl_filt_ident_2022jp
-};
-
const struct mbfl_convert_vtbl vtbl_jis_wchar = {
mbfl_no_encoding_jis,
mbfl_no_encoding_wchar,
@@ -124,24 +108,6 @@ const struct mbfl_convert_vtbl vtbl_wchar_2022jp = {
mbfl_filt_conv_any_jis_flush
};
-const struct mbfl_convert_vtbl vtbl_2022jpms_wchar = {
- mbfl_no_encoding_2022jpms,
- mbfl_no_encoding_wchar,
- mbfl_filt_conv_common_ctor,
- mbfl_filt_conv_common_dtor,
- mbfl_filt_conv_jis_wchar,
- mbfl_filt_conv_common_flush
-};
-
-const struct mbfl_convert_vtbl vtbl_wchar_2022jpms = {
- mbfl_no_encoding_wchar,
- mbfl_no_encoding_2022jpms,
- mbfl_filt_conv_common_ctor,
- mbfl_filt_conv_common_dtor,
- mbfl_filt_conv_wchar_jis,
- mbfl_filt_conv_any_jis_flush
-};
-
#define CK(statement) do { if ((statement) < 0) return (-1); } while (0)
/*
@@ -164,10 +130,12 @@ retry:
filter->status += 2;
} else if (c == 0x0e) { /* "kana in" */
filter->status = 0x20;
- CK((*filter->output_function)(c, filter->data));
} else if (c == 0x0f) { /* "kana out" */
filter->status = 0;
- CK((*filter->output_function)(c, filter->data));
+ } else if (filter->status == 0x10 && c == 0x5c) { /* YEN SIGN */
+ CK((*filter->output_function)(0xa5, filter->data));
+ } else if (filter->status == 0x10 && c == 0x7e) { /* OVER LINE */
+ CK((*filter->output_function)(0x203e, filter->data));
} else if (filter->status == 0x20 && c > 0x20 && c < 0x60) { /* kana */
CK((*filter->output_function)(0xff40 + c, filter->data));
} else if ((filter->status == 0x80 || filter->status == 0x90) && c > 0x20 && c < 0x7f) { /* kanji first char */
@@ -193,34 +161,9 @@ retry:
s = (c1 - 0x21)*94 + c - 0x21;
if (filter->status == 0x80) {
if (s >= 0 && s < jisx0208_ucs_table_size) {
- if ((filter->from)->no_encoding !=
- mbfl_no_encoding_2022jpms) {
- w = jisx0208_ucs_table[s];
- }
- else {
- if ((c1 - 0x21) == 12) {
- w = cp932ext1_ucs_table[s-12*94];
- }
- else {
- if (c1 >= 0x79 && c1 <= 0x7c) {
- w = cp932ext2_ucs_table[s-(0x79-0x21)*94];
- }
- else {
w = jisx0208_ucs_table[s];
- }
- }
- }
} else {
- if ((filter->from)->no_encoding !=
- mbfl_no_encoding_2022jpms) {
- w = 0;
- } else {
- if (c1 >= 0x79 && c1 <= 0x7c) {
- w = cp932ext2_ucs_table[s-(0x79-0x21)*94];
- } else {
w = 0;
- }
- }
}
if (w <= 0) {
w = (c1 << 8) | c;
@@ -344,7 +287,7 @@ retry:
int
mbfl_filt_conv_wchar_jis(int c, mbfl_convert_filter *filter)
{
- int c1, c2, s;
+ int c1, s;
s = 0;
if (c >= ucs_a1_jis_table_min && c < ucs_a1_jis_table_max) {
@@ -356,18 +299,6 @@ mbfl_filt_conv_wchar_jis(int c, mbfl_convert_filter *filter)
} else if (c >= ucs_r_jis_table_min && c < ucs_r_jis_table_max) {
s = ucs_r_jis_table[c - ucs_r_jis_table_min];
}
- if (s > 0x8080 && s < 0x10000 &&
- ((filter->to)->no_encoding == mbfl_no_encoding_2022jpms)) {
- c1 = 0;
- c2 = cp932ext2_ucs_table_max - cp932ext2_ucs_table_min;
- while (c1 < c2) { /* CP932 vendor ext3 (115ku - 119ku) */
- if (c == cp932ext2_ucs_table[c1]) {
- s = ((c1/94 + 0x79) << 8) +(c1%94 + 0x21);
- break;
- }
- c1++;
- }
- }
if (s <= 0) {
c1 = c & ~MBFL_WCSPLANE_MASK;
if (c1 == MBFL_WCSPLANE_JIS0208) {
@@ -396,30 +327,8 @@ mbfl_filt_conv_wchar_jis(int c, mbfl_convert_filter *filter)
}
if (c == 0) {
s = 0;
- } else if (s <= 0 && ((filter->to)->no_encoding ==
- mbfl_no_encoding_2022jpms)) {
+ } else if (s <= 0) {
s = -1;
- c1 = 0;
- c2 = cp932ext1_ucs_table_max - cp932ext1_ucs_table_min;
- while (c1 < c2) { /* CP932 vendor ext1 (13ku) */
- if (c == cp932ext1_ucs_table[c1]) {
- s = ((c1/94 + 0x2d) << 8) + (c1%94 + 0x21);
- break;
- }
- c1++;
- }
- if (s < 0 && ((filter->to)->no_encoding ==
- mbfl_no_encoding_2022jpms)) {
- c1 = 0;
- c2 = cp932ext2_ucs_table_max - cp932ext2_ucs_table_min;
- while (c1 < c2) { /* CP932 vendor ext3 (115ku - 119ku) */
- if (c == cp932ext2_ucs_table[c1]) {
- s = ((c1/94 + 0x79) << 8) +(c1%94 + 0x21);
- break;
- }
- c1++;
- }
- }
}
}
if (s >= 0) {
@@ -483,7 +392,7 @@ mbfl_filt_conv_wchar_jis(int c, mbfl_convert_filter *filter)
int
mbfl_filt_conv_wchar_2022jp(int c, mbfl_convert_filter *filter)
{
- int c1, c2, s;
+ int s;
s = 0;
if (c >= ucs_a1_jis_table_min && c < ucs_a1_jis_table_max) {
@@ -519,47 +428,9 @@ mbfl_filt_conv_wchar_2022jp(int c, mbfl_convert_filter *filter)
s = 0;
} else if (s <= 0) {
s = -1;
- if ( (filter->to)->no_encoding ==
- mbfl_no_encoding_2022jpms) {
- c1 = 0;
- c2 = cp932ext1_ucs_table_max - cp932ext1_ucs_table_min;
- while (c1 < c2) { /* CP932 vendor ext1 (13ku) */
- if (c == cp932ext1_ucs_table[c1]) {
- s = ((c1/94 + 0x2d) << 8) + (c1%94 + 0x21);
- break;
- }
- c1++;
- }
- if ((filter->to)->no_encoding ==
- mbfl_no_encoding_2022jpms) {
- c1 = 0;
- c2 = cp932ext2_ucs_table_max - cp932ext2_ucs_table_min;
- while (c1 < c2) { /* CP932 vendor ext3 (115ku - 119ku) */
- if (c == cp932ext2_ucs_table[c1]) {
- s = ((c1/94 + 0x79) << 8) +(c1%94 + 0x21);
- break;
- }
- c1++;
- }
- }
- }
}
- } else if (((s >= 0x80 && s < 0x2121) &&
- (filter->to)->no_encoding != mbfl_no_encoding_2022jpms) ||
- (s > 0x8080)) {
+ } else if ((s >= 0x80 && s < 0x2121) || (s > 0x8080)) {
s = -1;
- if ((filter->to)->no_encoding ==
- mbfl_no_encoding_2022jpms) {
- c1 = 0;
- c2 = cp932ext2_ucs_table_max - cp932ext2_ucs_table_min;
- while (c1 < c2) { /* CP932 vendor ext3 (115ku - 119ku) */
- if (c == cp932ext2_ucs_table[c1]) {
- s = ((c1/94 + 0x79) << 8) +(c1%94 + 0x21);
- break;
- }
- c1++;
- }
- }
}
if (s >= 0) {
if (s < 0x80) { /* ASCII */
@@ -570,15 +441,6 @@ mbfl_filt_conv_wchar_2022jp(int c, mbfl_convert_filter *filter)
}
filter->status = 0;
CK((*filter->output_function)(s, filter->data));
- } else if (s < 0x100 && ((filter->to)->no_encoding ==
- mbfl_no_encoding_2022jpms)) { /* kana */
- if ((filter->status & 0xff00) != 0x100) {
- CK((*filter->output_function)(0x1b, filter->data)); /* ESC */
- CK((*filter->output_function)(0x28, filter->data)); /* '(' */
- CK((*filter->output_function)(0x49, filter->data)); /* 'I' */
- }
- filter->status = 0x100;
- CK((*filter->output_function)(s & 0x7f, filter->data));
} else if (s < 0x10000) { /* X 0208 */
if ((filter->status & 0xff00) != 0x200) {
CK((*filter->output_function)(0x1b, filter->data)); /* ESC */
diff --git a/ext/mbstring/libmbfl/filters/mbfilter_jis.h b/ext/mbstring/libmbfl/filters/mbfilter_jis.h
index 1ba244a282..0375307263 100644
--- a/ext/mbstring/libmbfl/filters/mbfilter_jis.h
+++ b/ext/mbstring/libmbfl/filters/mbfilter_jis.h
@@ -34,16 +34,12 @@
extern const mbfl_encoding mbfl_encoding_jis;
extern const mbfl_encoding mbfl_encoding_2022jp;
-extern const mbfl_encoding mbfl_encoding_2022jpms;
extern const struct mbfl_identify_vtbl vtbl_identify_2022jp;
-extern const struct mbfl_identify_vtbl vtbl_identify_2022jpms;
extern const struct mbfl_identify_vtbl vtbl_identify_jis;
extern const struct mbfl_convert_vtbl vtbl_jis_wchar;
extern const struct mbfl_convert_vtbl vtbl_wchar_jis;
extern const struct mbfl_convert_vtbl vtbl_2022jp_wchar;
extern const struct mbfl_convert_vtbl vtbl_wchar_2022jp;
-extern const struct mbfl_convert_vtbl vtbl_2022jpms_wchar;
-extern const struct mbfl_convert_vtbl vtbl_wchar_2022jpms;
int mbfl_filt_conv_jis_wchar(int c, mbfl_convert_filter *filter);
int mbfl_filt_conv_wchar_jis(int c, mbfl_convert_filter *filter);
diff --git a/ext/mbstring/libmbfl/filters/mbfilter_koi8u.c b/ext/mbstring/libmbfl/filters/mbfilter_koi8u.c
new file mode 100644
index 0000000000..9b8f450e9e
--- /dev/null
+++ b/ext/mbstring/libmbfl/filters/mbfilter_koi8u.c
@@ -0,0 +1,146 @@
+/*
+ * "streamable kanji code filter and converter"
+ * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved.
+ *
+ * LICENSE NOTICES
+ *
+ * This file is part of "streamable kanji code filter and converter",
+ * which is distributed under the terms of GNU Lesser General Public
+ * License (version 2) as published by the Free Software Foundation.
+ *
+ * This software is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with "streamable kanji code filter and converter";
+ * if not, write to the Free Software Foundation, Inc., 59 Temple Place,
+ * Suite 330, Boston, MA 02111-1307 USA
+ *
+ * The author of this part: Maksym Veremeyenko
+ *
+ * Based on mbfilter_koi8r.c code
+ *
+ */
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "mbfilter.h"
+#include "mbfilter_koi8u.h"
+#include "unicode_table_koi8u.h"
+
+static int mbfl_filt_ident_koi8u(int c, mbfl_identify_filter *filter);
+
+static const char *mbfl_encoding_koi8u_aliases[] = {"KOI8-U", "KOI8U", NULL};
+
+const mbfl_encoding mbfl_encoding_koi8u = {
+ mbfl_no_encoding_koi8u,
+ "KOI8-U",
+ "KOI8-U",
+ (const char *(*)[])&mbfl_encoding_koi8u_aliases,
+ NULL,
+ MBFL_ENCTYPE_SBCS
+};
+
+const struct mbfl_identify_vtbl vtbl_identify_koi8u = {
+ mbfl_no_encoding_koi8u,
+ mbfl_filt_ident_common_ctor,
+ mbfl_filt_ident_common_dtor,
+ mbfl_filt_ident_koi8u
+};
+
+const struct mbfl_convert_vtbl vtbl_wchar_koi8u = {
+ mbfl_no_encoding_wchar,
+ mbfl_no_encoding_koi8u,
+ mbfl_filt_conv_common_ctor,
+ mbfl_filt_conv_common_dtor,
+ mbfl_filt_conv_wchar_koi8u,
+ mbfl_filt_conv_common_flush
+};
+
+const struct mbfl_convert_vtbl vtbl_koi8u_wchar = {
+ mbfl_no_encoding_koi8u,
+ mbfl_no_encoding_wchar,
+ mbfl_filt_conv_common_ctor,
+ mbfl_filt_conv_common_dtor,
+ mbfl_filt_conv_koi8u_wchar,
+ mbfl_filt_conv_common_flush
+};
+
+#define CK(statement) do { if ((statement) < 0) return (-1); } while (0)
+
+/*
+ * koi8u => wchar
+ */
+int
+mbfl_filt_conv_koi8u_wchar(int c, mbfl_convert_filter *filter)
+{
+ int s;
+
+ if (c >= 0 && c < koi8u_ucs_table_min) {
+ s = c;
+ } else if (c >= koi8u_ucs_table_min && c < 0x100) {
+ s = koi8u_ucs_table[c - koi8u_ucs_table_min];
+ if (s <= 0) {
+ s = c;
+ s &= MBFL_WCSPLANE_MASK;
+ s |= MBFL_WCSPLANE_KOI8U;
+ }
+ } else {
+ s = c;
+ s &= MBFL_WCSGROUP_MASK;
+ s |= MBFL_WCSGROUP_THROUGH;
+ }
+
+ CK((*filter->output_function)(s, filter->data));
+
+ return c;
+}
+
+/*
+ * wchar => koi8u
+ */
+int
+mbfl_filt_conv_wchar_koi8u(int c, mbfl_convert_filter *filter)
+{
+ int s, n;
+
+ if (c < 0x80) {
+ s = c;
+ } else {
+ s = -1;
+ n = koi8u_ucs_table_len-1;
+ while (n >= 0) {
+ if (c == koi8u_ucs_table[n]) {
+ s = koi8u_ucs_table_min + n;
+ break;
+ }
+ n--;
+ }
+ if (s <= 0 && (c & ~MBFL_WCSPLANE_MASK) == MBFL_WCSPLANE_KOI8U) {
+ s = c & MBFL_WCSPLANE_MASK;
+ }
+ }
+
+ if (s >= 0) {
+ CK((*filter->output_function)(s, filter->data));
+ } else {
+ if (filter->illegal_mode != MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE) {
+ CK(mbfl_filt_conv_illegal_output(c, filter));
+ }
+ }
+
+ return c;
+}
+
+static int mbfl_filt_ident_koi8u(int c, mbfl_identify_filter *filter)
+{
+ if (c >= 0x80 && c < 0xff)
+ filter->flag = 0;
+ else
+ filter->flag = 1; /* not it */
+ return c;
+}
diff --git a/ext/mbstring/libmbfl/filters/mbfilter_koi8u.h b/ext/mbstring/libmbfl/filters/mbfilter_koi8u.h
new file mode 100644
index 0000000000..693ade3d6d
--- /dev/null
+++ b/ext/mbstring/libmbfl/filters/mbfilter_koi8u.h
@@ -0,0 +1,47 @@
+/*
+ * "streamable kanji code filter and converter"
+ * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved.
+ *
+ * LICENSE NOTICES
+ *
+ * This file is part of "streamable kanji code filter and converter",
+ * which is distributed under the terms of GNU Lesser General Public
+ * License (version 2) as published by the Free Software Foundation.
+ *
+ * This software is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with "streamable kanji code filter and converter";
+ * if not, write to the Free Software Foundation, Inc., 59 Temple Place,
+ * Suite 330, Boston, MA 02111-1307 USA
+ *
+ * The author of this part: Maksym Veremeyenko
+ *
+ * Based on mbfilter_koi8r.h code
+ *
+ */
+
+#ifndef MBFL_MBFILTER_KOI8U_H
+#define MBFL_MBFILTER_KOI8U_H
+
+#include "mbfilter.h"
+
+extern const mbfl_encoding mbfl_encoding_koi8u;
+extern const struct mbfl_identify_vtbl vtbl_identify_koi8u;
+extern const struct mbfl_convert_vtbl vtbl_wchar_koi8u;
+extern const struct mbfl_convert_vtbl vtbl_koi8u_wchar;
+
+int mbfl_filt_conv_koi8u_wchar(int c, mbfl_convert_filter *filter);
+int mbfl_filt_conv_wchar_koi8u(int c, mbfl_convert_filter *filter);
+
+#endif /* MBFL_MBFILTER_KOI8U_H */
+
+/*
+ * Local variables:
+ * tab-width: 4
+ * c-basic-offset: 4
+ * End:
+ */
diff --git a/ext/mbstring/libmbfl/filters/unicode_table_cp1254.h b/ext/mbstring/libmbfl/filters/unicode_table_cp1254.h
new file mode 100644
index 0000000000..644053cf7a
--- /dev/null
+++ b/ext/mbstring/libmbfl/filters/unicode_table_cp1254.h
@@ -0,0 +1,51 @@
+/*
+ * "streamable kanji code filter and converter"
+ * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved.
+ *
+ * LICENSE NOTICES
+ *
+ * This file is part of "streamable kanji code filter and converter",
+ * which is distributed under the terms of GNU Lesser General Public
+ * License (version 2) as published by the Free Software Foundation.
+ *
+ * This software is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with "streamable kanji code filter and converter";
+ * if not, write to the Free Software Foundation, Inc., 59 Temple Place,
+ * Suite 330, Boston, MA 02111-1307 USA
+ *
+ * The authors of this file: PHP3 internationalization team
+ * You can contact the primary author 鿬ãè .
+ *
+ */
+
+#ifndef UNICODE_TABLE_CP1254_H
+
+/* cp1254 to Unicode table */
+static const unsigned short cp1254_ucs_table[] = {
+ 0x20ac, 0xfffe, 0x201a, 0x0192, 0x201e, 0x2026, 0x2020, 0x2021,
+ 0x02c6, 0x2030, 0x0160, 0x2039, 0x0152, 0xfffe, 0xfffe, 0xfffe,
+ 0xfffe, 0x2018, 0x2019, 0x201c, 0x201d, 0x2022, 0x2013, 0x2014,
+ 0x02dc, 0x2122, 0x0161, 0x203a, 0x0153, 0xfffe, 0xfffe, 0x0178,
+ 0x00a0, 0x00a1, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7,
+ 0x00a8, 0x00a9, 0x00aa, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af,
+ 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x00b6, 0x00b7,
+ 0x00b8, 0x00b9, 0x00ba, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00bf,
+ 0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7,
+ 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
+ 0x011e, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x00d7,
+ 0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x0130, 0x015e, 0x00df,
+ 0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7,
+ 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
+ 0x011f, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x00f7,
+ 0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x0131, 0x015f, 0x00ff
+};
+static const int cp1254_ucs_table_min = 0x80;
+static const int cp1254_ucs_table_len = (sizeof (cp1254_ucs_table) / sizeof (unsigned short));
+static const int cp1254_ucs_table_max = 0x80 + (sizeof (cp1254_ucs_table) / sizeof (unsigned short));
+
+#endif /* UNICODE_TABLE_CP1254_H */
diff --git a/ext/mbstring/libmbfl/filters/unicode_table_cp850.h b/ext/mbstring/libmbfl/filters/unicode_table_cp850.h
new file mode 100644
index 0000000000..6c60ae03e0
--- /dev/null
+++ b/ext/mbstring/libmbfl/filters/unicode_table_cp850.h
@@ -0,0 +1,52 @@
+/*
+ * "streamable kanji code filter and converter"
+ * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved.
+ *
+ * LICENSE NOTICES
+ *
+ * This file is part of "streamable kanji code filter and converter",
+ * which is distributed under the terms of GNU Lesser General Public
+ * License (version 2) as published by the Free Software Foundation.
+ *
+ * This software is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with "streamable kanji code filter and converter";
+ * if not, write to the Free Software Foundation, Inc., 59 Temple Place,
+ * Suite 330, Boston, MA 02111-1307 USA
+ *
+ * The author of this part: Den V. Tsopa
+ * Adaption for CP850: D. Giffeler
+ *
+ */
+
+#ifndef UNICODE_TABLE_CP850_H
+#define UNICODE_TABLE_CP850_H
+
+/* cp850_DOS to Unicode table */
+static const unsigned short cp850_ucs_table[] = {
+ 0x00c7, 0x00fc, 0x00e9, 0x00e2, 0x00e4, 0x00e0, 0x00e5, 0x00e7
+, 0x00ea, 0x00eb, 0x00e8, 0x00ef, 0x00ee, 0x00ec, 0x00c4, 0x00c5
+, 0x00c9, 0x00e6, 0x00c6, 0x00f4, 0x00f6, 0x00f2, 0x00fb, 0x00f9
+, 0x00ff, 0x00d6, 0x00dc, 0x00f8, 0x00a3, 0x00d8, 0x00d7, 0x0192
+, 0x00e1, 0x00ed, 0x00f3, 0x00fa, 0x00f1, 0x00d1, 0x00aa, 0x00ba
+, 0x00bf, 0x00ae, 0x00ac, 0x00bd, 0x00bc, 0x00a1, 0x00ab, 0x00bb
+, 0x2591, 0x2592, 0x2593, 0x2502, 0x2524, 0x00c1, 0x00c2, 0x00c0
+, 0x00a9, 0x2563, 0x2551, 0x2557, 0x255d, 0x00a2, 0x00a5, 0x2510
+, 0x2514, 0x2534, 0x252c, 0x251c, 0x2500, 0x253c, 0x00e3, 0x00c3
+, 0x255a, 0x2554, 0x2569, 0x2566, 0x2560, 0x2550, 0x256c, 0x00a4
+, 0x00f0, 0x00d0, 0x00ca, 0x00cb, 0x00c8, 0x0131, 0x00cd, 0x00ce
+, 0x00cf, 0x2518, 0x250c, 0x2588, 0x2584, 0x00a6, 0x00cc, 0x2580
+, 0x00d3, 0x00df, 0x00d4, 0x00d2, 0x00f5, 0x00d5, 0x00b5, 0x00fe
+, 0x00de, 0x00da, 0x00db, 0x00d9, 0x00fd, 0x00dd, 0x00af, 0x00b4
+, 0x00ad, 0x00b1, 0x2017, 0x00be, 0x00b6, 0x00a7, 0x00f7, 0x00b8
+, 0x00b0, 0x00a8, 0x00b7, 0x00b9, 0x00b3, 0x00b2, 0x25a0, 0x00a0
+};
+static const int cp850_ucs_table_min = 0x80;
+static const int cp850_ucs_table_len = (sizeof (cp850_ucs_table) / sizeof (unsigned short));
+static const int cp850_ucs_table_max = 0x80 + (sizeof (cp850_ucs_table) / sizeof (unsigned short));
+
+#endif /* UNICODE_TABLE_CP850_H */
diff --git a/ext/mbstring/libmbfl/filters/unicode_table_jis.h b/ext/mbstring/libmbfl/filters/unicode_table_jis.h
index e87dad93c8..5671c4e851 100644
--- a/ext/mbstring/libmbfl/filters/unicode_table_jis.h
+++ b/ext/mbstring/libmbfl/filters/unicode_table_jis.h
@@ -36,13 +36,13 @@ static const unsigned short jisx0208_ucs_table[] = {
0xFF1F,0xFF01,0x309B,0x309C,0x00B4,0xFF40,0x00A8,0xFF3E,
0xFFE3,0xFF3F,0x30FD,0x30FE,0x309D,0x309E,0x3003,0x4EDD,
0x3005,0x3006,0x3007,0x30FC,0x2015,0x2010,0xFF0F,0xFF3C,
- 0xFF5E,0x2225,0xFF5C,0x2026,0x2025,0x2018,0x2019,0x201C,
+ 0x301C,0x2016,0xFF5C,0x2026,0x2025,0x2018,0x2019,0x201C,
0x201D,0xFF08,0xFF09,0x3014,0x3015,0xFF3B,0xFF3D,0xFF5B,
0xFF5D,0x3008,0x3009,0x300A,0x300B,0x300C,0x300D,0x300E,
- 0x300F,0x3010,0x3011,0xFF0B,0xFF0D,0x00B1,0x00D7,0x00F7,
+ 0x300F,0x3010,0x3011,0xFF0B,0x2212,0x00B1,0x00D7,0x00F7,
0xFF1D,0x2260,0xFF1C,0xFF1E,0x2266,0x2267,0x221E,0x2234,
0x2642,0x2640,0x00B0,0x2032,0x2033,0x2103,0xFFE5,0xFF04,
- 0xFFE0,0xFFE1,0xFF05,0xFF03,0xFF06,0xFF0A,0xFF20,0x00A7,
+ 0x00A2,0x00A3,0xFF05,0xFF03,0xFF06,0xFF0A,0xFF20,0x00A7,
0x2606,0x2605,0x25CB,0x25CF,0x25CE,0x25C7,
/* ku 2 */
@@ -51,7 +51,7 @@ static const unsigned short jisx0208_ucs_table[] = {
0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,
0x0000,0x2208,0x220B,0x2286,0x2287,0x2282,0x2283,0x222A,
0x2229,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,
- 0x0000,0x2227,0x2228,0xFFE2,0x21D2,0x21D4,0x2200,0x2203,
+ 0x0000,0x2227,0x2228,0x00AC,0x21D2,0x21D4,0x2200,0x2203,
0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,
0x0000,0x0000,0x0000,0x2220,0x22A5,0x2312,0x2202,0x2207,
0x2261,0x2252,0x226A,0x226B,0x221A,0x223D,0x221D,0x2235,
@@ -1217,9 +1217,9 @@ static const unsigned short jisx0212_ucs_table[] = {
/* ku 2 */
0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,
0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x02D8,0x02C7,
- 0x00B8,0x02D9,0x02DD,0x00AF,0x02DB,0x02DA,0xFF5E,0x0384,
+ 0x00B8,0x02D9,0x02DD,0x00AF,0x02DB,0x02DA,0x007E,0x0384,
0x0385,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,
- 0x0000,0x00A1,0xFFE4,0x00BF,0x0000,0x0000,0x0000,0x0000,
+ 0x0000,0x00A1,0x00A6,0x00BF,0x0000,0x0000,0x0000,0x0000,
0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,
0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,
0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,
@@ -2471,7 +2471,7 @@ static const unsigned short ucs_a2_jis_table[] = {
/* 2100h */
0x0000,0x0000,0x0000,0x216E,0x0000,0x0000,0x0000,0x0000,
0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,
- 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x2d62,0x0000,
+ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0xA2F1,0x0000,
0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,
0x0000,0x0000,0xA2EF,0x0000,0x0000,0x0000,0x0000,0x0000,
0x0000,0x0000,0x0000,0x2272,0x0000,0x0000,0x0000,0x0000,
diff --git a/ext/mbstring/libmbfl/filters/unicode_table_koi8u.h b/ext/mbstring/libmbfl/filters/unicode_table_koi8u.h
new file mode 100644
index 0000000000..f3a4e9de63
--- /dev/null
+++ b/ext/mbstring/libmbfl/filters/unicode_table_koi8u.h
@@ -0,0 +1,166 @@
+/*
+ * "streamable kanji code filter and converter"
+ * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved.
+ *
+ * LICENSE NOTICES
+ *
+ * This file is part of "streamable kanji code filter and converter",
+ * which is distributed under the terms of GNU Lesser General Public
+ * License (version 2) as published by the Free Software Foundation.
+ *
+ * This software is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with "streamable kanji code filter and converter";
+ * if not, write to the Free Software Foundation, Inc., 59 Temple Place,
+ * Suite 330, Boston, MA 02111-1307 USA
+ *
+ * The author of this part: Maksym Veremeyenko
+ *
+ */
+
+#ifndef UNICODE_TABLE_KOI8U_H
+#define UNICODE_TABLE_KOI8U_H
+
+/* KOI8-U (RFC2319) to Unicode */
+static const unsigned short koi8u_ucs_table[] = {
+ 0x2500, /* BOX DRAWINGS LIGHT HORIZONTAL */
+ 0x2502, /* BOX DRAWINGS LIGHT VERTICAL */
+ 0x250C, /* BOX DRAWINGS LIGHT DOWN AND RIGHT */
+ 0x2510, /* BOX DRAWINGS LIGHT DOWN AND LEFT */
+ 0x2514, /* BOX DRAWINGS LIGHT UP AND RIGHT */
+ 0x2518, /* BOX DRAWINGS LIGHT UP AND LEFT */
+ 0x251C, /* BOX DRAWINGS LIGHT VERTICAL AND RIGHT */
+ 0x2524, /* BOX DRAWINGS LIGHT VERTICAL AND LEFT */
+ 0x252C, /* BOX DRAWINGS LIGHT DOWN AND HORIZONTAL */
+ 0x2534, /* BOX DRAWINGS LIGHT UP AND HORIZONTAL */
+ 0x253C, /* BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL */
+ 0x2580, /* UPPER HALF BLOCK */
+ 0x2584, /* LOWER HALF BLOCK */
+ 0x2588, /* FULL BLOCK */
+ 0x258C, /* LEFT HALF BLOCK */
+ 0x2590, /* RIGHT HALF BLOCK */
+ 0x2591, /* LIGHT SHADE */
+ 0x2592, /* MEDIUM SHADE */
+ 0x2593, /* DARK SHADE */
+ 0x2320, /* TOP HALF INTEGRAL */
+ 0x25A0, /* BLACK SQUARE */
+ 0x2219, /* BULLET OPERATOR */
+ 0x221A, /* SQUARE ROOT */
+ 0x2248, /* ALMOST EQUAL TO */
+ 0x2264, /* LESS THAN OR EQUAL TO */
+ 0x2265, /* GREATER THAN OR EQUAL TO */
+ 0x00A0, /* NO-BREAK SPACE */
+ 0x2321, /* BOTTOM HALF INTEGRAL */
+ 0x00B0, /* DEGREE SIGN */
+ 0x00B2, /* SUPERSCRIPT TWO */
+ 0x00B7, /* MIDDLE DOT */
+ 0x00F7, /* DIVISION SIGN */
+ 0x2550, /* BOX DRAWINGS DOUBLE HORIZONTAL */
+ 0x2551, /* BOX DRAWINGS DOUBLE VERTICAL */
+ 0x2552, /* BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE */
+ 0x0451, /* CYRILLIC SMALL LETTER IO */
+ 0x0454, /* CYRILLIC SMALL LETTER UKRAINIAN IE */
+ 0x2554, /* BOX DRAWINGS DOUBLE DOWN AND RIGHT */
+ 0x0456, /* CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I */
+ 0x0457, /* CYRILLIC SMALL LETTER YI (UKRAINIAN) */
+ 0x2557, /* BOX DRAWINGS DOUBLE DOWN AND LEFT */
+ 0x2558, /* BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE */
+ 0x2559, /* BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE */
+ 0x255A, /* BOX DRAWINGS DOUBLE UP AND RIGHT */
+ 0x255B, /* BOX DRAWINGS UP SINGLE AND LEFT DOUBLE */
+ 0x0491, /* CYRILLIC SMALL LETTER GHE WITH UPTURN */
+ 0x255D, /* BOX DRAWINGS DOUBLE UP AND LEFT */
+ 0x255E, /* BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE */
+ 0x255F, /* BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE */
+ 0x2560, /* BOX DRAWINGS DOUBLE VERTICAL AND RIGHT */
+ 0x2561, /* BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE */
+ 0x0401, /* CYRILLIC CAPITAL LETTER IO */
+ 0x0404, /* CYRILLIC CAPITAL LETTER UKRAINIAN IE */
+ 0x2563, /* BOX DRAWINGS DOUBLE VERTICAL AND LEFT */
+ 0x0406, /* CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I */
+ 0x0407, /* CYRILLIC CAPITAL LETTER YI (UKRAINIAN) */
+ 0x2566, /* BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL */
+ 0x2567, /* BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE */
+ 0x2568, /* BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE */
+ 0x2569, /* BOX DRAWINGS DOUBLE UP AND HORIZONTAL */
+ 0x256A, /* BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE */
+ 0x0490, /* CYRILLIC CAPITAL LETTER GHE WITH UPTURN */
+ 0x256C, /* BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL */
+ 0x00A9, /* COPYRIGHT SIGN */
+ 0x044E, /* CYRILLIC SMALL LETTER YU */
+ 0x0430, /* CYRILLIC SMALL LETTER A */
+ 0x0431, /* CYRILLIC SMALL LETTER BE */
+ 0x0446, /* CYRILLIC SMALL LETTER TSE */
+ 0x0434, /* CYRILLIC SMALL LETTER DE */
+ 0x0435, /* CYRILLIC SMALL LETTER IE */
+ 0x0444, /* CYRILLIC SMALL LETTER EF */
+ 0x0433, /* CYRILLIC SMALL LETTER GHE */
+ 0x0445, /* CYRILLIC SMALL LETTER KHA */
+ 0x0438, /* CYRILLIC SMALL LETTER I */
+ 0x0439, /* CYRILLIC SMALL LETTER SHORT I */
+ 0x043A, /* CYRILLIC SMALL LETTER KA */
+ 0x043B, /* CYRILLIC SMALL LETTER EL */
+ 0x043C, /* CYRILLIC SMALL LETTER EM */
+ 0x043D, /* CYRILLIC SMALL LETTER EN */
+ 0x043E, /* CYRILLIC SMALL LETTER O */
+ 0x043F, /* CYRILLIC SMALL LETTER PE */
+ 0x044F, /* CYRILLIC SMALL LETTER YA */
+ 0x0440, /* CYRILLIC SMALL LETTER ER */
+ 0x0441, /* CYRILLIC SMALL LETTER ES */
+ 0x0442, /* CYRILLIC SMALL LETTER TE */
+ 0x0443, /* CYRILLIC SMALL LETTER U */
+ 0x0436, /* CYRILLIC SMALL LETTER ZHE */
+ 0x0432, /* CYRILLIC SMALL LETTER VE */
+ 0x044C, /* CYRILLIC SMALL LETTER SOFT SIGN */
+ 0x044B, /* CYRILLIC SMALL LETTER YERU */
+ 0x0437, /* CYRILLIC SMALL LETTER ZE */
+ 0x0448, /* CYRILLIC SMALL LETTER SHA */
+ 0x044D, /* CYRILLIC SMALL LETTER E */
+ 0x0449, /* CYRILLIC SMALL LETTER SHCHA */
+ 0x0447, /* CYRILLIC SMALL LETTER CHE */
+ 0x044A, /* CYRILLIC SMALL LETTER HARD SIGN */
+ 0x042E, /* CYRILLIC CAPITAL LETTER YU */
+ 0x0410, /* CYRILLIC CAPITAL LETTER A */
+ 0x0411, /* CYRILLIC CAPITAL LETTER BE */
+ 0x0426, /* CYRILLIC CAPITAL LETTER TSE */
+ 0x0414, /* CYRILLIC CAPITAL LETTER DE */
+ 0x0415, /* CYRILLIC CAPITAL LETTER IE */
+ 0x0424, /* CYRILLIC CAPITAL LETTER EF */
+ 0x0413, /* CYRILLIC CAPITAL LETTER GHE */
+ 0x0425, /* CYRILLIC CAPITAL LETTER KHA */
+ 0x0418, /* CYRILLIC CAPITAL LETTER I */
+ 0x0419, /* CYRILLIC CAPITAL LETTER SHORT I */
+ 0x041A, /* CYRILLIC CAPITAL LETTER KA */
+ 0x041B, /* CYRILLIC CAPITAL LETTER EL */
+ 0x041C, /* CYRILLIC CAPITAL LETTER EM */
+ 0x041D, /* CYRILLIC CAPITAL LETTER EN */
+ 0x041E, /* CYRILLIC CAPITAL LETTER O */
+ 0x041F, /* CYRILLIC CAPITAL LETTER PE */
+ 0x042F, /* CYRILLIC CAPITAL LETTER YA */
+ 0x0420, /* CYRILLIC CAPITAL LETTER ER */
+ 0x0421, /* CYRILLIC CAPITAL LETTER ES */
+ 0x0422, /* CYRILLIC CAPITAL LETTER TE */
+ 0x0423, /* CYRILLIC CAPITAL LETTER U */
+ 0x0416, /* CYRILLIC CAPITAL LETTER ZHE */
+ 0x0412, /* CYRILLIC CAPITAL LETTER VE */
+ 0x042C, /* CYRILLIC CAPITAL LETTER SOFT SIGN */
+ 0x042B, /* CYRILLIC CAPITAL LETTER YERU */
+ 0x0417, /* CYRILLIC CAPITAL LETTER ZE */
+ 0x0428, /* CYRILLIC CAPITAL LETTER SHA */
+ 0x042D, /* CYRILLIC CAPITAL LETTER E */
+ 0x0429, /* CYRILLIC CAPITAL LETTER SHCHA */
+ 0x0427, /* CYRILLIC CAPITAL LETTER CHE */
+ 0x042A /* CYRILLIC CAPITAL LETTER HARD SIGN */
+};
+static const int koi8u_ucs_table_min = 0x80;
+static const int koi8u_ucs_table_len = (sizeof (koi8u_ucs_table) / sizeof (unsigned short));
+static const int koi8u_ucs_table_max = 0x80 + (sizeof (koi8u_ucs_table) / sizeof (unsigned short));
+
+
+
+#endif /* UNNICODE_TABLE_KOI8U_H */
+
diff --git a/ext/mbstring/libmbfl/libmbfl.dsp b/ext/mbstring/libmbfl/libmbfl.dsp
index ddb4f90032..a6d9cfc720 100644
--- a/ext/mbstring/libmbfl/libmbfl.dsp
+++ b/ext/mbstring/libmbfl/libmbfl.dsp
@@ -243,6 +243,10 @@ SOURCE=.\filters\mbfilter_koi8r.c
# End Source File
# Begin Source File
+SOURCE=.\filters\mbfilter_koi8u.c
+# End Source File
+# Begin Source File
+
SOURCE=.\filters\mbfilter_armscii8.c
# End Source File
# Begin Source File
@@ -556,6 +560,10 @@ SOURCE=.\filters\mbfilter_koi8r.h
# End Source File
# Begin Source File
+SOURCE=.\filters\mbfilter_koi8u.h
+# End Source File
+# Begin Source File
+
SOURCE=.\filters\mbfilter_armscii8.h
# End Source File
# Begin Source File
@@ -776,6 +784,10 @@ SOURCE=.\filters\unicode_table_koi8r.h
# End Source File
# Begin Source File
+SOURCE=.\filters\unicode_table_koi8u.h
+# End Source File
+# Begin Source File
+
SOURCE=.\filters\unicode_table_armscii8.h
# End Source File
# Begin Source File
diff --git a/ext/mbstring/libmbfl/libmbfl.sln b/ext/mbstring/libmbfl/libmbfl.sln
index f49f0c0d86..becef513a5 100755
--- a/ext/mbstring/libmbfl/libmbfl.sln
+++ b/ext/mbstring/libmbfl/libmbfl.sln
@@ -1,21 +1,19 @@
-Microsoft Visual Studio Solution File, Format Version 7.00
+Microsoft Visual Studio Solution File, Format Version 10.00
+# Visual C++ Express 2008
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "libmbfl", "libmbfl.vcproj", "{B3636594-A785-4270-A765-8EAE922B5207}"
EndProject
Global
- GlobalSection(SolutionConfiguration) = preSolution
- ConfigName.0 = Debug
- ConfigName.1 = Release
+ GlobalSection(SolutionConfigurationPlatforms) = preSolution
+ Debug|Win32 = Debug|Win32
+ Release|Win32 = Release|Win32
EndGlobalSection
- GlobalSection(ProjectDependencies) = postSolution
+ GlobalSection(ProjectConfigurationPlatforms) = postSolution
+ {B3636594-A785-4270-A765-8EAE922B5207}.Debug|Win32.ActiveCfg = Debug|Win32
+ {B3636594-A785-4270-A765-8EAE922B5207}.Debug|Win32.Build.0 = Debug|Win32
+ {B3636594-A785-4270-A765-8EAE922B5207}.Release|Win32.ActiveCfg = Release|Win32
+ {B3636594-A785-4270-A765-8EAE922B5207}.Release|Win32.Build.0 = Release|Win32
EndGlobalSection
- GlobalSection(ProjectConfiguration) = postSolution
- {B3636594-A785-4270-A765-8EAE922B5207}.Debug.ActiveCfg = Debug|Win32
- {B3636594-A785-4270-A765-8EAE922B5207}.Debug.Build.0 = Debug|Win32
- {B3636594-A785-4270-A765-8EAE922B5207}.Release.ActiveCfg = Release|Win32
- {B3636594-A785-4270-A765-8EAE922B5207}.Release.Build.0 = Release|Win32
- EndGlobalSection
- GlobalSection(ExtensibilityGlobals) = postSolution
- EndGlobalSection
- GlobalSection(ExtensibilityAddIns) = postSolution
+ GlobalSection(SolutionProperties) = preSolution
+ HideSolutionNode = FALSE
EndGlobalSection
EndGlobal
diff --git a/ext/mbstring/libmbfl/libmbfl.vcproj b/ext/mbstring/libmbfl/libmbfl.vcproj
index 29e0af0a27..0111012d65 100755
--- a/ext/mbstring/libmbfl/libmbfl.vcproj
+++ b/ext/mbstring/libmbfl/libmbfl.vcproj
@@ -1,24 +1,50 @@
-
+
+ RootNamespace="libmbfl"
+ TargetFrameworkVersion="131072"
+ >
+ Name="Win32"
+ />
+
+
+ ATLMinimizesCRunTimeLibraryUsage="false"
+ CharacterSet="2"
+ >
+
+
+
+
+
+ CompileAs="0"
+ />
+ Name="VCManagedResourceCompilerTool"
+ />
+
+
+ RandomizedBaseAddress="1"
+ DataExecutionPrevention="0"
+ ImportLibrary=".\Debug/mbfl.lib"
+ />
+ Name="VCALinkTool"
+ />
+ Name="VCManifestTool"
+ />
+ Name="VCXDCMakeTool"
+ />
+ Name="VCBscMakeTool"
+ />
+ Name="VCFxCopTool"
+ />
+ Name="VCAppVerifierTool"
+ />
+ Name="VCPostBuildEventTool"
+ />
+ ATLMinimizesCRunTimeLibraryUsage="false"
+ CharacterSet="2"
+ >
+
+
+
+
+
+ SuppressStartupBanner="true"
+ CompileAs="0"
+ />
+
+ Name="VCResourceCompilerTool"
+ PreprocessorDefinitions="NDEBUG"
+ Culture="1033"
+ />
+
+ RandomizedBaseAddress="1"
+ DataExecutionPrevention="0"
+ ImportLibrary=".\Release/mbfl.lib"
+ />
+ Name="VCALinkTool"
+ />
+ Name="VCManifestTool"
+ />
+ Name="VCXDCMakeTool"
+ />
+ Name="VCBscMakeTool"
+ />
+ Name="VCFxCopTool"
+ />
+ Name="VCAppVerifierTool"
+ />
+ Name="VCPostBuildEventTool"
+ />
+
+
+ Filter="vc6"
+ >
+
+
+
+
+ RelativePath=".\filters\mbfilter_7bit.c"
+ >
+ RelativePath=".\mbfl\mbfilter_8bit.c"
+ >
+ RelativePath=".\filters\mbfilter_ascii.c"
+ >
+ RelativePath=".\filters\mbfilter_base64.c"
+ >
+ RelativePath=".\filters\mbfilter_big5.c"
+ >
+ RelativePath=".\filters\mbfilter_byte2.c"
+ >
+ RelativePath=".\filters\mbfilter_byte4.c"
+ >
+ RelativePath=".\filters\mbfilter_cp1251.c"
+ >
+ RelativePath=".\filters\mbfilter_cp1252.c"
+ >
+ RelativePath=".\filters\mbfilter_cp1254.c"
+ >
+ RelativePath=".\filters\mbfilter_cp866.c"
+ >
+ RelativePath=".\filters\mbfilter_cp932.c"
+ >
+ RelativePath=".\filters\mbfilter_cp936.c"
+ >
+ RelativePath=".\filters\mbfilter_euc_cn.c"
+ >
+ RelativePath=".\filters\mbfilter_euc_jp.c"
+ >
+ RelativePath=".\filters\mbfilter_euc_jp_win.c"
+ >
+ RelativePath=".\filters\mbfilter_euc_kr.c"
+ >
+ RelativePath=".\filters\mbfilter_euc_tw.c"
+ >
+ RelativePath=".\filters\mbfilter_htmlent.c"
+ >
+ RelativePath=".\filters\mbfilter_hz.c"
+ >
+ RelativePath=".\filters\mbfilter_iso2022_kr.c"
+ >
+ RelativePath=".\filters\mbfilter_iso8859_1.c"
+ >
+ RelativePath=".\filters\mbfilter_iso8859_10.c"
+ >
+ RelativePath=".\filters\mbfilter_iso8859_13.c"
+ >
+ RelativePath=".\filters\mbfilter_iso8859_14.c"
+ >
+ RelativePath=".\filters\mbfilter_iso8859_15.c"
+ >
+ RelativePath=".\filters\mbfilter_iso8859_16.c"
+ >
+ RelativePath=".\filters\mbfilter_iso8859_2.c"
+ >
+ RelativePath=".\filters\mbfilter_iso8859_3.c"
+ >
+ RelativePath=".\filters\mbfilter_iso8859_4.c"
+ >
+ RelativePath=".\filters\mbfilter_iso8859_5.c"
+ >
+ RelativePath=".\filters\mbfilter_iso8859_6.c"
+ >
+ RelativePath=".\filters\mbfilter_iso8859_7.c"
+ >
+ RelativePath=".\filters\mbfilter_iso8859_8.c"
+ >
+ RelativePath=".\filters\mbfilter_iso8859_9.c"
+ >
+ RelativePath=".\filters\mbfilter_jis.c"
+ >
+ RelativePath=".\filters\mbfilter_koi8r.c"
+ >
+ RelativePath=".\filters\mbfilter_koi8u.c"
+ >
+ RelativePath=".\mbfl\mbfilter_pass.c"
+ >
+ RelativePath=".\filters\mbfilter_qprint.c"
+ >
+ RelativePath=".\filters\mbfilter_sjis.c"
+ >
+ RelativePath=".\filters\mbfilter_ucs2.c"
+ >
+ RelativePath=".\filters\mbfilter_ucs4.c"
+ >
+ RelativePath=".\filters\mbfilter_uhc.c"
+ >
+ RelativePath=".\filters\mbfilter_utf16.c"
+ >
+ RelativePath=".\filters\mbfilter_utf32.c"
+ >
+ RelativePath=".\filters\mbfilter_utf7.c"
+ >
+ RelativePath=".\filters\mbfilter_utf7imap.c"
+ >
+ RelativePath=".\filters\mbfilter_utf8.c"
+ >
+ RelativePath=".\filters\mbfilter_uuencode.c"
+ >
+ RelativePath=".\mbfl\mbfilter_wchar.c"
+ >
+ RelativePath=".\mbfl\mbfl_allocators.c"
+ >
+ RelativePath=".\mbfl\mbfl_convert.c"
+ >
+ RelativePath=".\mbfl\mbfl_encoding.c"
+ >
+ RelativePath=".\mbfl\mbfl_filter_output.c"
+ >
+ RelativePath=".\mbfl\mbfl_ident.c"
+ >
+ RelativePath=".\mbfl\mbfl_language.c"
+ >
+ RelativePath=".\mbfl\mbfl_memory_device.c"
+ >
+ RelativePath=".\mbfl\mbfl_string.c"
+ >
+ RelativePath=".\nls\nls_de.c"
+ >
+ RelativePath=".\nls\nls_en.c"
+ >
+ RelativePath=".\nls\nls_ja.c"
+ >
+ RelativePath=".\nls\nls_kr.c"
+ >
+ RelativePath=".\nls\nls_neutral.c"
+ >
+ RelativePath=".\nls\nls_ru.c"
+ >
+ RelativePath=".\nls\nls_uni.c"
+ >
+ RelativePath=".\nls\nls_zh.c"
+ >
+ Filter="h;hpp;hxx;hm;inl"
+ >
+ RelativePath=".\config.h.vc6"
+ >
+ Name="Debug|Win32"
+ >
+ CommandLine="copy $(InputDir)\config.h.vc6 "$(InputDir)\config.h"
"
+ Outputs="$(InputDir)\config.h"
+ />
+ Name="Release|Win32"
+ >
+ CommandLine="copy $(InputDir)\config.h.vc6 "$(InputDir)\config.h"
"
+ Outputs="$(InputDir)\config.h"
+ />
+ RelativePath=".\filters\cp932_table.h"
+ >
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ RelativePath=".\filters\mbfilter_cp1254.h"
+ >
+ RelativePath=".\filters\mbfilter_cp5022x.h"
+ >
+ RelativePath=".\filters\mbfilter_cp51932.h"
+ >
+ RelativePath=".\filters\mbfilter_cp866.h"
+ >
+ RelativePath=".\filters\mbfilter_cp932.h"
+ >
+ RelativePath=".\filters\mbfilter_cp936.h"
+ >
+ RelativePath=".\filters\mbfilter_euc_cn.h"
+ >
+ RelativePath=".\filters\mbfilter_euc_jp.h"
+ >
+ RelativePath=".\filters\mbfilter_euc_jp_win.h"
+ >
+ RelativePath=".\filters\mbfilter_euc_kr.h"
+ >
+ RelativePath=".\filters\mbfilter_euc_tw.h"
+ >
+ RelativePath=".\filters\mbfilter_htmlent.h"
+ >
+ RelativePath=".\filters\mbfilter_hz.h"
+ >
+ RelativePath=".\filters\mbfilter_iso2022_kr.h"
+ >
+ RelativePath=".\filters\mbfilter_iso8859_1.h"
+ >
+ RelativePath=".\filters\mbfilter_iso8859_10.h"
+ >
+ RelativePath=".\filters\mbfilter_iso8859_13.h"
+ >
+ RelativePath=".\filters\mbfilter_iso8859_14.h"
+ >
+ RelativePath=".\filters\mbfilter_iso8859_15.h"
+ >
+ RelativePath=".\filters\mbfilter_iso8859_16.h"
+ >
+ RelativePath=".\filters\mbfilter_iso8859_2.h"
+ >
+ RelativePath=".\filters\mbfilter_iso8859_3.h"
+ >
+ RelativePath=".\filters\mbfilter_iso8859_4.h"
+ >
+ RelativePath=".\filters\mbfilter_iso8859_5.h"
+ >
+ RelativePath=".\filters\mbfilter_iso8859_6.h"
+ >
+ RelativePath=".\filters\mbfilter_iso8859_7.h"
+ >
+ RelativePath=".\filters\mbfilter_iso8859_8.h"
+ >
+ RelativePath=".\filters\mbfilter_iso8859_9.h"
+ >
+ RelativePath=".\filters\mbfilter_jis.h"
+ >
+ RelativePath=".\filters\mbfilter_koi8r.h"
+ >
+ RelativePath=".\filters\mbfilter_koi8u.h"
+ >
+ RelativePath=".\mbfl\mbfilter_pass.h"
+ >
+ RelativePath=".\filters\mbfilter_qprint.h"
+ >
+ RelativePath=".\filters\mbfilter_sjis.h"
+ >
+ RelativePath=".\filters\mbfilter_tl_jisx0201_jisx0208.h"
+ >
+ RelativePath=".\filters\mbfilter_ucs2.h"
+ >
+ RelativePath=".\filters\mbfilter_ucs4.h"
+ >
+ RelativePath=".\filters\mbfilter_uhc.h"
+ >
+ RelativePath=".\filters\mbfilter_utf16.h"
+ >
+ RelativePath=".\filters\mbfilter_utf32.h"
+ >
+ RelativePath=".\filters\mbfilter_utf7.h"
+ >
+ RelativePath=".\filters\mbfilter_utf7imap.h"
+ >
+ RelativePath=".\filters\mbfilter_utf8.h"
+ >
+ RelativePath=".\filters\mbfilter_uuencode.h"
+ >
+ RelativePath=".\mbfl\mbfilter_wchar.h"
+ >
+ RelativePath=".\mbfl\mbfl_allocators.h"
+ >
+ RelativePath=".\mbfl\mbfl_consts.h"
+ >
+ RelativePath=".\mbfl\mbfl_convert.h"
+ >
+ RelativePath=".\mbfl\mbfl_encoding.h"
+ >
+ RelativePath=".\mbfl\mbfl_filter_output.h"
+ >
+ RelativePath=".\mbfl\mbfl_ident.h"
+ >
+ RelativePath=".\mbfl\mbfl_language.h"
+ >
+ RelativePath=".\mbfl\mbfl_memory_device.h"
+ >
+ RelativePath=".\mbfl\mbfl_string.h"
+ >
+ RelativePath=".\nls\nls_de.h"
+ >
+ RelativePath=".\nls\nls_en.h"
+ >
+ RelativePath=".\nls\nls_hy.h"
+ >
+ RelativePath=".\nls\nls_ja.h"
+ >
+ RelativePath=".\nls\nls_kr.h"
+ >
+ RelativePath=".\nls\nls_neutral.h"
+ >
+ RelativePath=".\nls\nls_ru.h"
+ >
+ RelativePath=".\nls\nls_tr.h"
+ >
+ RelativePath=".\nls\nls_ua.h"
+ >
+ RelativePath=".\nls\nls_uni.h"
+ >
+ RelativePath=".\nls\nls_zh.h"
+ >
+ RelativePath=".\filters\unicode_prop.h"
+ >
+ RelativePath=".\filters\unicode_table_armscii8.h"
+ >
+ RelativePath=".\filters\unicode_table_big5.h"
+ >
+ RelativePath=".\filters\unicode_table_cns11643.h"
+ >
+ RelativePath=".\filters\unicode_table_cp1251.h"
+ >
+ RelativePath=".\filters\unicode_table_cp1252.h"
+ >
+ RelativePath=".\filters\unicode_table_cp1254.h"
+ >
+ RelativePath=".\filters\unicode_table_cp866.h"
+ >
+ RelativePath=".\filters\unicode_table_cp932_ext.h"
+ >
+ RelativePath=".\filters\unicode_table_cp936.h"
+ >
+ RelativePath=".\filters\unicode_table_iso8859_10.h"
+ >
+ RelativePath=".\filters\unicode_table_iso8859_13.h"
+ >
+ RelativePath=".\filters\unicode_table_iso8859_14.h"
+ >
+ RelativePath=".\filters\unicode_table_iso8859_15.h"
+ >
+ RelativePath=".\filters\unicode_table_iso8859_16.h"
+ >
+ RelativePath=".\filters\unicode_table_iso8859_2.h"
+ >
+ RelativePath=".\filters\unicode_table_iso8859_3.h"
+ >
+ RelativePath=".\filters\unicode_table_iso8859_4.h"
+ >
+ RelativePath=".\filters\unicode_table_iso8859_5.h"
+ >
+ RelativePath=".\filters\unicode_table_iso8859_6.h"
+ >
+ RelativePath=".\filters\unicode_table_iso8859_7.h"
+ >
+ RelativePath=".\filters\unicode_table_iso8859_8.h"
+ >
+ RelativePath=".\filters\unicode_table_iso8859_9.h"
+ >
+ RelativePath=".\filters\unicode_table_jis.h"
+ >
+ RelativePath=".\filters\unicode_table_koi8r.h"
+ >
+ RelativePath=".\filters\unicode_table_koi8u.h"
+ >
+ RelativePath=".\filters\unicode_table_uhc.h"
+ >
+ Filter="ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe"
+ >
+ RelativePath=".\mbfl.rc"
+ >
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/ext/mbstring/libmbfl/mbfl.rc b/ext/mbstring/libmbfl/mbfl.rc
index 655e61893a..9d6a0c78e5 100644
--- a/ext/mbstring/libmbfl/mbfl.rc
+++ b/ext/mbstring/libmbfl/mbfl.rc
@@ -1,7 +1,7 @@
/* $Id$ */
1 VERSIONINFO
-FILEVERSION 1,1,0,0
-PRODUCTVERSION 1,1,0,0
+FILEVERSION 1,0,2,0
+PRODUCTVERSION 1,0,2,0
FILEFLAGSMASK 0
FILEOS 0x40000
FILETYPE 1
@@ -12,12 +12,12 @@ FILETYPE 1
{
VALUE "CompanyName", "-\0"
VALUE "FileDescription", "streamable kanji code filter\0"
- VALUE "FileVersion", "1.1.0\0"
+ VALUE "FileVersion", "1.0.2\0"
VALUE "InternalName", "mbfl\0"
VALUE "LegalCopyright", "GNU Lesser Public License Version 2.0\0"
VALUE "OriginalFilename", "mbfl.dll\0"
VALUE "ProductName", "mbfl\0"
- VALUE "ProductVersion", "1.1.0\0"
+ VALUE "ProductVersion", "1.0.2\0"
}
}
}
diff --git a/ext/mbstring/libmbfl/mbfl/Makefile.am b/ext/mbstring/libmbfl/mbfl/Makefile.am
index 25d6734541..6e662d14e5 100644
--- a/ext/mbstring/libmbfl/mbfl/Makefile.am
+++ b/ext/mbstring/libmbfl/mbfl/Makefile.am
@@ -1,12 +1,37 @@
EXTRA_DIST=Makefile.bcc32 mk_eaw_tbl.awk
lib_LTLIBRARIES=libmbfl.la
-libmbfl_la_SOURCES=mbfilter.c mbfl_string.c mbfl_language.c mbfl_encoding.c mbfl_convert.c mbfl_ident.c mbfl_memory_device.c mbfl_allocators.c mbfl_filter_output.c mbfilter_pass.c mbfilter_wchar.c mbfilter_8bit.c eaw_table.h
+libmbfl_la_SOURCES=mbfilter.c \
+ mbfl_string.c \
+ mbfl_language.c \
+ mbfl_encoding.c \
+ mbfl_convert.c \
+ mbfl_ident.c \
+ mbfl_memory_device.c \
+ mbfl_allocators.c \
+ mbfl_filter_output.c \
+ mbfilter_pass.c \
+ mbfilter_wchar.c \
+ mbfilter_8bit.c \
+ eaw_table.h
libmbfl_filters_la=../filters/libmbfl_filters.la
libmbfl_nls_la=../nls/libmbfl_nls.la
libmbfl_la_LIBADD=$(libmbfl_filters_la) $(libmbfl_nls_la)
libmbfl_la_LDFLAGS=-version-info $(SHLIB_VERSION)
libmbfl_includedir=$(includedir)/mbfl
-libmbfl_include_HEADERS=mbfilter.h mbfl_consts.h mbfl_encoding.h mbfl_language.h mbfl_string.h mbfl_convert.h mbfl_ident.h mbfl_memory_device.h mbfl_allocators.h mbfl_defs.h mbfl_filter_output.h mbfilter_pass.h mbfilter_wchar.h mbfilter_8bit.h
+libmbfl_include_HEADERS=mbfilter.h \
+ mbfl_consts.h \
+ mbfl_encoding.h \
+ mbfl_language.h \
+ mbfl_string.h \
+ mbfl_convert.h \
+ mbfl_ident.h \
+ mbfl_memory_device.h \
+ mbfl_allocators.h \
+ mbfl_defs.h \
+ mbfl_filter_output.h \
+ mbfilter_pass.h \
+ mbfilter_wchar.h \
+ mbfilter_8bit.h
mbfilter.c: eaw_table.h
diff --git a/ext/mbstring/libmbfl/mbfl/Makefile.bcc32 b/ext/mbstring/libmbfl/mbfl/Makefile.bcc32
index 5d43e6a6f4..1b43a49efe 100644
--- a/ext/mbstring/libmbfl/mbfl/Makefile.bcc32
+++ b/ext/mbstring/libmbfl/mbfl/Makefile.bcc32
@@ -1,5 +1,16 @@
!include ..\rules.mak.bcc32
-OBJS=mbfilter.obj mbfilter_8bit.obj mbfilter_pass.obj mbfilter_wchar.obj mbfl_allocators.obj mbfl_convert.obj mbfl_encoding.obj mbfl_filter_output.obj mbfl_ident.obj mbfl_language.obj mbfl_memory_device.obj mbfl_string.obj
+OBJS=mbfilter.obj \
+ mbfilter_8bit.obj \
+ mbfilter_pass.obj \
+ mbfilter_wchar.obj \
+ mbfl_allocators.obj \
+ mbfl_convert.obj \
+ mbfl_encoding.obj \
+ mbfl_filter_output.obj \
+ mbfl_ident.obj \
+ mbfl_language.obj \
+ mbfl_memory_device.obj \
+ mbfl_string.obj
all: $(OBJS)
diff --git a/ext/mbstring/libmbfl/mbfl/mbfilter.h b/ext/mbstring/libmbfl/mbfl/mbfilter.h
index 73b7229c22..a00c51b5f4 100644
--- a/ext/mbstring/libmbfl/mbfl/mbfilter.h
+++ b/ext/mbstring/libmbfl/mbfl/mbfilter.h
@@ -98,6 +98,13 @@
#include "mbfl_convert.h"
#include "mbfl_ident.h"
+/*
+ * version information
+ */
+#define MBFL_VERSION_MAJOR 1
+#define MBFL_VERSION_MINOR 0
+#define MBFL_VERSION_TEENY 2
+
/*
* convert filter
*/
diff --git a/ext/mbstring/libmbfl/mbfl/mbfilter_8bit.h b/ext/mbstring/libmbfl/mbfl/mbfilter_8bit.h
index a87c564616..4fc8922605 100644
--- a/ext/mbstring/libmbfl/mbfl/mbfilter_8bit.h
+++ b/ext/mbstring/libmbfl/mbfl/mbfilter_8bit.h
@@ -34,6 +34,6 @@
#include "mbfl_defs.h"
#include "mbfilter.h"
-MBFLAPI extern const mbfl_encoding mbfl_encoding_8bit;
+extern const mbfl_encoding mbfl_encoding_8bit;
#endif /* MBFL_MBFILTER_8BIT_H */
diff --git a/ext/mbstring/libmbfl/mbfl/mbfilter_pass.h b/ext/mbstring/libmbfl/mbfl/mbfilter_pass.h
index 49d169c668..087aa2c3be 100644
--- a/ext/mbstring/libmbfl/mbfl/mbfilter_pass.h
+++ b/ext/mbstring/libmbfl/mbfl/mbfilter_pass.h
@@ -33,8 +33,8 @@
#include "mbfl_defs.h"
#include "mbfilter.h"
-MBFLAPI extern const mbfl_encoding mbfl_encoding_pass;
-MBFLAPI extern const struct mbfl_convert_vtbl vtbl_pass;
+extern const mbfl_encoding mbfl_encoding_pass;
+extern const struct mbfl_convert_vtbl vtbl_pass;
MBFLAPI extern int mbfl_filt_conv_pass(int c, mbfl_convert_filter *filter);
diff --git a/ext/mbstring/libmbfl/mbfl/mbfilter_wchar.h b/ext/mbstring/libmbfl/mbfl/mbfilter_wchar.h
index 9e9396a77f..24bf7473c1 100644
--- a/ext/mbstring/libmbfl/mbfl/mbfilter_wchar.h
+++ b/ext/mbstring/libmbfl/mbfl/mbfilter_wchar.h
@@ -34,6 +34,6 @@
#include "mbfl_defs.h"
#include "mbfilter.h"
-MBFLAPI extern const mbfl_encoding mbfl_encoding_wchar;
+extern const mbfl_encoding mbfl_encoding_wchar;
#endif /* MBFL_MBFILTER_WCHAR_H */
diff --git a/ext/mbstring/libmbfl/mbfl/mbfl_convert.c b/ext/mbstring/libmbfl/mbfl/mbfl_convert.c
index 17e00dd595..725a674b39 100644
--- a/ext/mbstring/libmbfl/mbfl/mbfl_convert.c
+++ b/ext/mbstring/libmbfl/mbfl/mbfl_convert.c
@@ -53,15 +53,18 @@
#include "filters/mbfilter_sjis.h"
#include "filters/mbfilter_cp51932.h"
#include "filters/mbfilter_jis.h"
+#include "filters/mbfilter_iso2022_jp_ms.h"
#include "filters/mbfilter_euc_jp.h"
#include "filters/mbfilter_euc_jp_win.h"
#include "filters/mbfilter_ascii.h"
#include "filters/mbfilter_koi8r.h"
+#include "filters/mbfilter_koi8u.h"
#include "filters/mbfilter_cp866.h"
#include "filters/mbfilter_cp932.h"
#include "filters/mbfilter_cp936.h"
#include "filters/mbfilter_cp1251.h"
#include "filters/mbfilter_cp1252.h"
+#include "filters/mbfilter_cp1254.h"
#include "filters/mbfilter_iso8859_1.h"
#include "filters/mbfilter_iso8859_2.h"
#include "filters/mbfilter_iso8859_3.h"
@@ -140,8 +143,12 @@ const struct mbfl_convert_vtbl *mbfl_convert_filter_list[] = {
&vtbl_wchar_cp866,
&vtbl_koi8r_wchar,
&vtbl_wchar_koi8r,
+ &vtbl_koi8u_wchar,
+ &vtbl_wchar_koi8u,
&vtbl_cp1252_wchar,
&vtbl_wchar_cp1252,
+ &vtbl_cp1254_wchar,
+ &vtbl_wchar_cp1254,
&vtbl_ascii_wchar,
&vtbl_wchar_ascii,
&vtbl_8859_1_wchar,
diff --git a/ext/mbstring/libmbfl/mbfl/mbfl_encoding.c b/ext/mbstring/libmbfl/mbfl/mbfl_encoding.c
index 2e5b4abaf8..76956f0530 100644
--- a/ext/mbstring/libmbfl/mbfl/mbfl_encoding.c
+++ b/ext/mbstring/libmbfl/mbfl/mbfl_encoding.c
@@ -59,15 +59,18 @@
#include "filters/mbfilter_sjis.h"
#include "filters/mbfilter_cp51932.h"
#include "filters/mbfilter_jis.h"
+#include "filters/mbfilter_iso2022_jp_ms.h"
#include "filters/mbfilter_euc_jp.h"
#include "filters/mbfilter_euc_jp_win.h"
#include "filters/mbfilter_ascii.h"
#include "filters/mbfilter_koi8r.h"
+#include "filters/mbfilter_koi8u.h"
#include "filters/mbfilter_cp866.h"
#include "filters/mbfilter_cp932.h"
#include "filters/mbfilter_cp936.h"
#include "filters/mbfilter_cp1251.h"
#include "filters/mbfilter_cp1252.h"
+#include "filters/mbfilter_cp1254.h"
#include "filters/mbfilter_iso8859_1.h"
#include "filters/mbfilter_iso8859_2.h"
#include "filters/mbfilter_iso8859_3.h"
@@ -156,6 +159,7 @@ static const mbfl_encoding *mbfl_encoding_ptr_list[] = {
&mbfl_encoding_2022jp,
&mbfl_encoding_2022jpms,
&mbfl_encoding_cp1252,
+ &mbfl_encoding_cp1254,
&mbfl_encoding_8859_1,
&mbfl_encoding_8859_2,
&mbfl_encoding_8859_3,
@@ -181,6 +185,7 @@ static const mbfl_encoding *mbfl_encoding_ptr_list[] = {
&mbfl_encoding_cp1251,
&mbfl_encoding_cp866,
&mbfl_encoding_koi8r,
+ &mbfl_encoding_koi8u,
&mbfl_encoding_armscii8,
&mbfl_encoding_cp850,
NULL
@@ -197,16 +202,16 @@ mbfl_name2encoding(const char *name)
return NULL;
}
- i = 0;
- while ((encoding = mbfl_encoding_ptr_list[i++]) != NULL){
+ i = 0;
+ while ((encoding = mbfl_encoding_ptr_list[i++]) != NULL){
if (strcasecmp(encoding->name, name) == 0) {
return encoding;
}
}
- /* serch MIME charset name */
- i = 0;
- while ((encoding = mbfl_encoding_ptr_list[i++]) != NULL) {
+ /* search MIME charset name */
+ i = 0;
+ while ((encoding = mbfl_encoding_ptr_list[i++]) != NULL) {
if (encoding->mime_name != NULL) {
if (strcasecmp(encoding->mime_name, name) == 0) {
return encoding;
@@ -214,12 +219,12 @@ mbfl_name2encoding(const char *name)
}
}
- /* serch aliases */
- i = 0;
- while ((encoding = mbfl_encoding_ptr_list[i++]) != NULL) {
+ /* search aliases */
+ i = 0;
+ while ((encoding = mbfl_encoding_ptr_list[i++]) != NULL) {
if (encoding->aliases != NULL) {
- j = 0;
- while ((*encoding->aliases)[j] != NULL) {
+ j = 0;
+ while ((*encoding->aliases)[j] != NULL) {
if (strcasecmp((*encoding->aliases)[j], name) == 0) {
return encoding;
}
diff --git a/ext/mbstring/libmbfl/mbfl/mbfl_encoding.h b/ext/mbstring/libmbfl/mbfl/mbfl_encoding.h
index 351a217170..2599e1107e 100644
--- a/ext/mbstring/libmbfl/mbfl/mbfl_encoding.h
+++ b/ext/mbstring/libmbfl/mbfl/mbfl_encoding.h
@@ -75,6 +75,7 @@ enum mbfl_no_encoding {
mbfl_no_encoding_2022jp,
mbfl_no_encoding_2022jpms,
mbfl_no_encoding_cp1252,
+ mbfl_no_encoding_cp1254,
mbfl_no_encoding_8859_1,
mbfl_no_encoding_8859_2,
mbfl_no_encoding_8859_3,
@@ -99,6 +100,7 @@ enum mbfl_no_encoding {
mbfl_no_encoding_cp1251,
mbfl_no_encoding_cp866,
mbfl_no_encoding_koi8r,
+ mbfl_no_encoding_koi8u,
mbfl_no_encoding_8859_16,
mbfl_no_encoding_armscii8,
mbfl_no_encoding_cp850,
diff --git a/ext/mbstring/libmbfl/mbfl/mbfl_ident.c b/ext/mbstring/libmbfl/mbfl/mbfl_ident.c
index ade0f2a1ab..4f3bd5c58d 100644
--- a/ext/mbstring/libmbfl/mbfl/mbfl_ident.c
+++ b/ext/mbstring/libmbfl/mbfl/mbfl_ident.c
@@ -51,15 +51,19 @@
#include "filters/mbfilter_iso2022_kr.h"
#include "filters/mbfilter_sjis.h"
#include "filters/mbfilter_jis.h"
+#include "filters/mbfilter_iso2022_jp_ms.h"
#include "filters/mbfilter_euc_jp.h"
#include "filters/mbfilter_euc_jp_win.h"
#include "filters/mbfilter_ascii.h"
#include "filters/mbfilter_koi8r.h"
+#include "filters/mbfilter_koi8u.h"
#include "filters/mbfilter_cp866.h"
#include "filters/mbfilter_cp932.h"
#include "filters/mbfilter_cp936.h"
#include "filters/mbfilter_cp1251.h"
#include "filters/mbfilter_cp1252.h"
+#include "filters/mbfilter_cp1254.h"
+#include "filters/mbfilter_cp51932.h"
#include "filters/mbfilter_iso8859_1.h"
#include "filters/mbfilter_iso8859_2.h"
#include "filters/mbfilter_iso8859_3.h"
@@ -108,6 +112,7 @@ static const struct mbfl_identify_vtbl *mbfl_identify_filter_list[] = {
&vtbl_identify_jis,
&vtbl_identify_2022jp,
&vtbl_identify_2022jpms,
+ &vtbl_identify_cp51932,
&vtbl_identify_euccn,
&vtbl_identify_cp936,
&vtbl_identify_hz,
@@ -119,7 +124,9 @@ static const struct mbfl_identify_vtbl *mbfl_identify_filter_list[] = {
&vtbl_identify_cp1251,
&vtbl_identify_cp866,
&vtbl_identify_koi8r,
+ &vtbl_identify_koi8u,
&vtbl_identify_cp1252,
+ &vtbl_identify_cp1254,
&vtbl_identify_8859_1,
&vtbl_identify_8859_2,
&vtbl_identify_8859_3,
diff --git a/ext/mbstring/libmbfl/mbfl/mbfl_language.c b/ext/mbstring/libmbfl/mbfl/mbfl_language.c
index aaeebbc8e8..4dd9726362 100644
--- a/ext/mbstring/libmbfl/mbfl/mbfl_language.c
+++ b/ext/mbstring/libmbfl/mbfl/mbfl_language.c
@@ -57,6 +57,7 @@
#include "nls/nls_uni.h"
#include "nls/nls_de.h"
#include "nls/nls_ru.h"
+#include "nls/nls_ua.h"
#include "nls/nls_en.h"
#include "nls/nls_hy.h"
#include "nls/nls_tr.h"
@@ -77,6 +78,7 @@ static const mbfl_language *mbfl_language_ptr_table[] = {
&mbfl_language_english,
&mbfl_language_german,
&mbfl_language_russian,
+ &mbfl_language_ukrainian,
&mbfl_language_armenian,
&mbfl_language_turkish,
&mbfl_language_neutral,
diff --git a/ext/mbstring/libmbfl/mbfl/mbfl_language.h b/ext/mbstring/libmbfl/mbfl/mbfl_language.h
index caf1d80940..af42a010cf 100644
--- a/ext/mbstring/libmbfl/mbfl/mbfl_language.h
+++ b/ext/mbstring/libmbfl/mbfl/mbfl_language.h
@@ -57,6 +57,7 @@ enum mbfl_no_language {
mbfl_no_language_simplified_chinese, /* zh-cn */
mbfl_no_language_traditional_chinese, /* zh-tw */
mbfl_no_language_russian, /* ru */
+ mbfl_no_language_ukrainian, /* ua */
mbfl_no_language_armenian, /* hy */
mbfl_no_language_turkish, /* tr */
mbfl_no_language_max
diff --git a/ext/mbstring/libmbfl/nls/Makefile.am b/ext/mbstring/libmbfl/nls/Makefile.am
index ca81f902c8..454a07c638 100644
--- a/ext/mbstring/libmbfl/nls/Makefile.am
+++ b/ext/mbstring/libmbfl/nls/Makefile.am
@@ -2,4 +2,25 @@ EXTRA_DIST=Makefile.bcc32
noinst_LTLIBRARIES=libmbfl_nls.la
INCLUDES=-I../mbfl
libmbfl_nls_la_LDFLAGS=-version-info $(SHLIB_VERSION)
-libmbfl_nls_la_SOURCES=nls_ja.c nls_de.c nls_en.c nls_hy.c nls_kr.c nls_ru.c nls_zh.c nls_uni.c nls_neutral.c nls_ja.h nls_de.h nls_en.h nls_hy.h nls_kr.h nls_ru.h nls_zh.h nls_uni.h nls_neutral.h
+libmbfl_nls_la_SOURCES=nls_ja.c \
+ nls_de.c \
+ nls_en.c \
+ nls_hy.c \
+ nls_tr.c \
+ nls_kr.c \
+ nls_ru.c \
+ nls_ua.c \
+ nls_zh.c \
+ nls_uni.c \
+ nls_neutral.c \
+ nls_ja.h \
+ nls_de.h \
+ nls_en.h \
+ nls_hy.h \
+ nls_tr.h \
+ nls_kr.h \
+ nls_ru.h \
+ nls_ua.h \
+ nls_zh.h \
+ nls_uni.h \
+ nls_neutral.h
diff --git a/ext/mbstring/libmbfl/nls/Makefile.bcc32 b/ext/mbstring/libmbfl/nls/Makefile.bcc32
index 444e88c52b..dea8689cb1 100644
--- a/ext/mbstring/libmbfl/nls/Makefile.bcc32
+++ b/ext/mbstring/libmbfl/nls/Makefile.bcc32
@@ -1,6 +1,16 @@
!include ..\rules.mak.bcc32
INCLUDES=$(INCLUDES) -I..\mbfl
-OBJS=nls_ja.obj nls_de.obj nls_en.obj nls_hy.obj nls_kr.obj nls_ru.obj nls_zh.obj nls_uni.obj nls_neutral.obj
+OBJS=nls_ja.obj \
+ nls_de.obj \
+ nls_en.obj \
+ nls_hy.obj \
+ nls_tr.obj \
+ nls_kr.obj \
+ nls_ru.obj \
+ nls_ua.obj \
+ nls_zh.obj \
+ nls_uni.obj \
+ nls_neutral.obj
all: $(OBJS)
diff --git a/ext/mbstring/libmbfl/nls/nls_ua.c b/ext/mbstring/libmbfl/nls/nls_ua.c
new file mode 100644
index 0000000000..85fe9b49f9
--- /dev/null
+++ b/ext/mbstring/libmbfl/nls/nls_ua.c
@@ -0,0 +1,22 @@
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+
+#ifdef HAVE_STDDEF_H
+#include
+#endif
+
+
+#include "mbfilter.h"
+#include "nls_ua.h"
+
+const mbfl_language mbfl_language_ukrainian = {
+ mbfl_no_language_ukrainian,
+ "Ukrainian",
+ "ua",
+ NULL,
+ mbfl_no_encoding_koi8u,
+ mbfl_no_encoding_qprint,
+ mbfl_no_encoding_8bit
+};
diff --git a/ext/mbstring/libmbfl/nls/nls_ua.h b/ext/mbstring/libmbfl/nls/nls_ua.h
new file mode 100644
index 0000000000..0efa8d1f08
--- /dev/null
+++ b/ext/mbstring/libmbfl/nls/nls_ua.h
@@ -0,0 +1,9 @@
+#ifndef MBFL_NLS_UA_H
+#define MBFL_NLS_UA_H
+
+#include "mbfilter.h"
+#include "nls_ua.h"
+
+extern const mbfl_language mbfl_language_ukrainian;
+
+#endif /* MBFL_NLS_UA_H */
diff --git a/ext/mbstring/libmbfl/tests/Makefile.am b/ext/mbstring/libmbfl/tests/Makefile.am
new file mode 100644
index 0000000000..8e857bc22e
--- /dev/null
+++ b/ext/mbstring/libmbfl/tests/Makefile.am
@@ -0,0 +1,10 @@
+SUBDIRS=conv_encoding.tests conv_kana.tests strwidth.tests strcut.tests
+noinst_PROGRAMS=conv_encoding conv_kana strwidth strcut
+conv_encoding_SOURCES=conv_encoding.c
+conv_encoding_LDADD=../mbfl/libmbfl.la
+conv_kana_SOURCES=conv_kana.c
+conv_kana_LDADD=../mbfl/libmbfl.la
+strwidth_SOURCES=strwidth.c
+strwidth_LDADD=../mbfl/libmbfl.la
+strcut_SOURCES=strcut.c
+strcut_LDADD=../mbfl/libmbfl.la
diff --git a/ext/mbstring/libmbfl/tests/conv_encoding.c b/ext/mbstring/libmbfl/tests/conv_encoding.c
new file mode 100644
index 0000000000..9769964743
--- /dev/null
+++ b/ext/mbstring/libmbfl/tests/conv_encoding.c
@@ -0,0 +1,104 @@
+/**
+ * this is a small sample script to use libmbfl.
+ * Rui Hirokawa
+ *
+ * this file is encoded in EUC-JP.
+ */
+
+#include
+#include
+#include
+#include "mbfl/mbfilter.h"
+
+static void hexdump(const mbfl_string *ptr)
+{
+ unsigned int i;
+
+ for (i = 0; i < ptr->len; i++) {
+ printf("%%%02x", ptr->val[i]);
+ }
+
+ printf(" (%u)\n", ptr->len);
+}
+
+int main(int argc, char **argv)
+{
+ enum mbfl_no_encoding from_encoding, to_encoding;
+ enum mbfl_no_language no_language;
+ mbfl_buffer_converter *convd = NULL;
+ mbfl_memory_device dev;
+ mbfl_string string, result, *ret;
+ int final = 0;
+ int state = 0;
+
+ if (argc < 4) {
+ fprintf(stderr, "Usage: %s lang to_encoding from_encoding\n", argv[0]);
+ return EXIT_FAILURE;
+ }
+
+ if ((no_language = mbfl_name2no_language(argv[1])) ==
+ mbfl_no_language_invalid) {
+ printf("Unsupported NLS: %s\n", argv[1]);
+ return EXIT_FAILURE;
+ }
+
+ if ((to_encoding = mbfl_name2no_encoding(argv[2])) ==
+ mbfl_no_encoding_invalid) {
+ printf("Unsupported encoding: %s\n", argv[2]);
+ return EXIT_FAILURE;
+ }
+
+ if ((from_encoding = mbfl_name2no_encoding(argv[3])) ==
+ mbfl_no_encoding_invalid) {
+ printf("Unsupported encoding: %s\n", argv[3]);
+ return EXIT_FAILURE;
+ }
+
+ convd = mbfl_buffer_converter_new(from_encoding, to_encoding, 0);
+
+ do {
+ mbfl_memory_device_init(&dev, 0, 4096);
+ mbfl_string_init_set(&string, no_language, from_encoding);
+
+ for (;;) {
+ const int c = fgetc(stdin);
+
+ if (c == EOF) {
+ final = 1;
+ break;
+ } else if (c == 10) {
+ if (state == 1) {
+ state = 0;
+ continue;
+ }
+ break;
+ } else if (c == 13) {
+ state = 1;
+ break;
+ }
+
+ if (dev.pos >= dev.length) {
+ if (dev.length + dev.allocsz < dev.length) {
+ printf("Unable to allocate memory\n");
+ return EXIT_FAILURE;
+ }
+
+ mbfl_memory_device_realloc(&dev, dev.length + dev.allocsz,
+ dev.allocsz);
+ }
+
+ dev.buffer[dev.pos++] = (unsigned char)c;
+ }
+
+ mbfl_memory_device_result(&dev, &string);
+ mbfl_string_init_set(&result, no_language, to_encoding);
+ ret = mbfl_buffer_converter_feed_result(convd, &string, &result);
+ hexdump(&result);
+ mbfl_string_clear(&result);
+ mbfl_string_clear(&string);
+ } while (!final);
+
+ mbfl_buffer_converter_delete(convd);
+
+ return EXIT_SUCCESS;
+}
diff --git a/ext/mbstring/libmbfl/tests/conv_encoding.tests/Makefile.am b/ext/mbstring/libmbfl/tests/conv_encoding.tests/Makefile.am
new file mode 100644
index 0000000000..37713c3952
--- /dev/null
+++ b/ext/mbstring/libmbfl/tests/conv_encoding.tests/Makefile.am
@@ -0,0 +1 @@
+EXTRA_DIST=*.exp
diff --git a/ext/mbstring/libmbfl/tests/conv_encoding.tests/cp51932_cp50220raw.exp b/ext/mbstring/libmbfl/tests/conv_encoding.tests/cp51932_cp50220raw.exp
new file mode 100644
index 0000000000..0e63ef11b7
--- /dev/null
+++ b/ext/mbstring/libmbfl/tests/conv_encoding.tests/cp51932_cp50220raw.exp
@@ -0,0 +1,33 @@
+#!/usr/bin/expect -f
+spawn tests/conv_encoding Japanese CP50220raw eucJP-win
+set timeout 1
+
+expect_after {
+ "\[^\r\n\]*\r\n" { fail $test }
+}
+
+set test "81 - 87ku"
+send "\xf5\xba\xf6\xec\xf7\xc9\xf8\xb3\xf9\xa1\xfa\xa1\xfb\xa1\r"
+expect {
+ "%1b%24%42%75%3a%76%6c%77%49%78%33%79%21%7a%21%7b%21%1b%28%42 (20)\r\n" { pass $test }
+}
+
+
+set test "kanji + kana"
+send "ÆüËÜ¸ì¥Æ¥¹¥È\r"
+expect {
+ "%1b%24%42%46%7c%4b%5c%38%6c%25%46%25%39%25%48%1b%28%42 (18)\r\n" { pass $test }
+}
+
+set test "full-width numerics"
+send "£°£±£²£³£´£µ£¶£·£¸£¹\r"
+expect {
+ "%1b%24%42%23%30%23%31%23%32%23%33%23%34%23%35%23%36%23%37%23%38%23%39%1b%28%42 (26)\r\n" { pass $test }
+}
+
+set test "full-width numerics"
+send "Î"
+expect {
+ "%1b%24%42%2d%42%1b%28%42 (8)\r\n" { pass $test }
+}
+
diff --git a/ext/mbstring/libmbfl/tests/conv_encoding.tests/ujis_sjis.exp b/ext/mbstring/libmbfl/tests/conv_encoding.tests/ujis_sjis.exp
new file mode 100644
index 0000000000..882953f71b
--- /dev/null
+++ b/ext/mbstring/libmbfl/tests/conv_encoding.tests/ujis_sjis.exp
@@ -0,0 +1,35 @@
+#!/usr/bin/expect -f
+spawn tests/conv_encoding Japanese Shift_JIS EUC-JP
+set timeout 1
+
+expect_after {
+ "\[^\r\n\]*\r\n" { fail $test }
+}
+
+set test "basic test"
+send "testtest\r"
+expect {
+ "%74%65%73%74%74%65%73%74 (8)\r\n" { pass $test }
+}
+
+
+set test "kanji + kana"
+send "ÆüËÜ¸ì¥Æ¥¹¥È\r"
+expect {
+ "%93%fa%96%7b%8c%ea%83%65%83%58%83%67 (12)\r\n" { pass $test }
+}
+
+set test "full-width numerics"
+send "£°£±£²£³£´£µ£¶£·£¸£¹\r"
+expect {
+ "%82%4f%82%50%82%51%82%52%82%53%82%54%82%55%82%56%82%57%82%58 (20)\r\n" { pass $test }
+}
+
+set test "full-width numerics"
+send "Î"
+expect {
+ "%3f (1)\r\n" { pass $test }
+}
+
+close
+# vim: sts=4 ts=4 sw=4 et encoding=EUC-JP
diff --git a/ext/mbstring/libmbfl/tests/conv_encoding.tests/utf8_sjis.exp b/ext/mbstring/libmbfl/tests/conv_encoding.tests/utf8_sjis.exp
new file mode 100644
index 0000000000..e51b5e4d6e
--- /dev/null
+++ b/ext/mbstring/libmbfl/tests/conv_encoding.tests/utf8_sjis.exp
@@ -0,0 +1,35 @@
+#!/usr/bin/expect -f
+spawn tests/conv_encoding Japanese Shift_JIS UTF-8
+set timeout 1
+
+expect_after {
+ "\[^\r\n\]*\r\n" { fail $test }
+}
+
+set test "basic test"
+send "testtest\r"
+expect {
+ "%74%65%73%74%74%65%73%74 (8)\r\n" { pass $test }
+}
+
+
+set test "kanji + kana"
+send "æ¥æ¬èªãã¹ã\r"
+expect {
+ "%93%fa%96%7b%8c%ea%83%65%83%58%83%67 (12)\r\n" { pass $test }
+}
+
+set test "full-width numerics"
+send "ï¼ï¼ï¼ï¼ï¼ï¼ï¼ï¼ï¼ï¼\r"
+expect {
+ "%82%4f%82%50%82%51%82%52%82%53%82%54%82%55%82%56%82%57%82%58 (20)\r\n" { pass $test }
+}
+
+set test "full-width numerics"
+send "ã"
+expect {
+ "%3f (1)\r\n" { pass $test }
+}
+
+close
+# vim: sts=4 ts=4 sw=4 et encoding=EUC-JP
diff --git a/ext/mbstring/libmbfl/tests/conv_kana.c b/ext/mbstring/libmbfl/tests/conv_kana.c
new file mode 100644
index 0000000000..c12fdc2f9b
--- /dev/null
+++ b/ext/mbstring/libmbfl/tests/conv_kana.c
@@ -0,0 +1,147 @@
+/**
+ * this is a small sample script to use libmbfl.
+ * Rui Hirokawa
+ *
+ * this file is encoded in EUC-JP.
+ */
+
+#include
+#include
+#include
+#include "mbfl/mbfilter.h"
+
+static void hexdump(const mbfl_string *ptr)
+{
+ unsigned int i;
+
+ for (i = 0; i < ptr->len; i++) {
+ printf("%%%02x", ptr->val[i]);
+ }
+
+ printf(" (%u)\n", ptr->len);
+}
+
+int main(int argc, char **argv)
+{
+ enum mbfl_no_encoding no_enc;
+ const enum mbfl_no_language no_lang = mbfl_no_language_japanese;
+ mbfl_memory_device dev;
+ mbfl_string string, result;
+ int final = 0;
+ int state = 0;
+ int mode = 0;
+
+ if (argc < 3) {
+ fprintf(stderr, "Usage: %s encoding flags\n", argv[0]);
+ return EXIT_FAILURE;
+ }
+
+ if ((no_enc = mbfl_name2no_encoding(argv[1])) ==
+ mbfl_no_encoding_invalid) {
+ printf("Unsupported encoding: %s\n", argv[1]);
+ return EXIT_FAILURE;
+ }
+
+ {
+ const char *p;
+
+ for (p= argv[2] + strlen(argv[2]); p > argv[2]; ) {
+ switch (*(--p)) {
+ case 'A':
+ mode |= 0x1;
+ break;
+ case 'a':
+ mode |= 0x10;
+ break;
+ case 'R':
+ mode |= 0x2;
+ break;
+ case 'r':
+ mode |= 0x20;
+ break;
+ case 'N':
+ mode |= 0x4;
+ break;
+ case 'n':
+ mode |= 0x40;
+ break;
+ case 'S':
+ mode |= 0x8;
+ break;
+ case 's':
+ mode |= 0x80;
+ break;
+ case 'K':
+ mode |= 0x100;
+ break;
+ case 'k':
+ mode |= 0x1000;
+ break;
+ case 'H':
+ mode |= 0x200;
+ break;
+ case 'h':
+ mode |= 0x2000;
+ break;
+ case 'V':
+ mode |= 0x800;
+ break;
+ case 'C':
+ mode |= 0x10000;
+ break;
+ case 'c':
+ mode |= 0x20000;
+ break;
+ case 'M':
+ mode |= 0x100000;
+ break;
+ case 'm':
+ mode |= 0x200000;
+ break;
+ }
+ }
+ }
+
+ do {
+ mbfl_memory_device_init(&dev, 0, 4096);
+ mbfl_string_init_set(&string, no_lang, no_enc);
+
+ for (;;) {
+ const int c = fgetc(stdin);
+
+ if (c == EOF) {
+ final = 1;
+ break;
+ } else if (c == 10) {
+ if (state == 1) {
+ state = 0;
+ continue;
+ }
+ break;
+ } else if (c == 13) {
+ state = 1;
+ break;
+ }
+
+ if (dev.pos >= dev.length) {
+ if (dev.length + dev.allocsz < dev.length) {
+ printf("Unable to allocate memory\n");
+ return EXIT_FAILURE;
+ }
+
+ mbfl_memory_device_realloc(&dev, dev.length + dev.allocsz,
+ dev.allocsz);
+ }
+
+ dev.buffer[dev.pos++] = (unsigned char)c;
+ }
+
+ mbfl_memory_device_result(&dev, &string);
+ mbfl_ja_jp_hantozen(&string, &result, mode);
+ hexdump(&result);
+ mbfl_string_clear(&result);
+ mbfl_string_clear(&string);
+ } while (!final);
+
+ return EXIT_SUCCESS;
+}
diff --git a/ext/mbstring/libmbfl/tests/strcut.c b/ext/mbstring/libmbfl/tests/strcut.c
new file mode 100644
index 0000000000..2d6a873205
--- /dev/null
+++ b/ext/mbstring/libmbfl/tests/strcut.c
@@ -0,0 +1,113 @@
+/**
+ * this is a small sample script to use libmbfl.
+ * Rui Hirokawa
+ *
+ * this file is encoded in EUC-JP.
+ */
+
+#include
+#include
+#include
+#include
+#include "mbfl/mbfilter.h"
+
+static void hexdump(const mbfl_string *ptr)
+{
+ unsigned int i;
+
+ for (i = 0; i < ptr->len; i++) {
+ printf("%%%02x", ptr->val[i]);
+ }
+
+ printf(" (%u)\n", ptr->len);
+}
+
+int main(int argc, char **argv)
+{
+ enum mbfl_no_encoding no_encoding;
+ enum mbfl_no_language no_language;
+ mbfl_memory_device dev;
+ mbfl_string string;
+ int offset, length;
+ int final = 0;
+ int state = 0;
+
+ if (argc < 5) {
+ fprintf(stderr, "Usage: %s lang encoding offset length\n", argv[0]);
+ return EXIT_FAILURE;
+ }
+
+ if ((no_language = mbfl_name2no_language(argv[1])) ==
+ mbfl_no_language_invalid) {
+ printf("Unsupported NLS: %s\n", argv[1]);
+ return EXIT_FAILURE;
+ }
+
+ if ((no_encoding = mbfl_name2no_encoding(argv[2])) ==
+ mbfl_no_encoding_invalid) {
+ printf("Unsupported encoding: %s\n", argv[2]);
+ return EXIT_FAILURE;
+ }
+
+ errno = 0;
+ offset = strtol(argv[3], NULL, 10);
+ if (errno) {
+ printf("Invalid offset: %s\n", argv[3]);
+ return EXIT_FAILURE;
+ }
+
+ length = strtol(argv[4], NULL, 10);
+ if (errno) {
+ printf("Invalid length: %s\n", argv[4]);
+ return EXIT_FAILURE;
+ }
+
+
+ do {
+ mbfl_string result;
+
+ mbfl_memory_device_init(&dev, 0, 4096);
+ mbfl_string_init_set(&string, no_language, no_encoding);
+
+ for (;;) {
+ const int c = fgetc(stdin);
+
+ if (c == EOF) {
+ final = 1;
+ break;
+ } else if (c == 10) {
+ if (state == 1) {
+ state = 0;
+ continue;
+ }
+ break;
+ } else if (c == 13) {
+ state = 1;
+ break;
+ }
+
+ if (dev.pos >= dev.length) {
+ if (dev.length + dev.allocsz < dev.length) {
+ printf("Unable to allocate memory\n");
+ return EXIT_FAILURE;
+ }
+
+ mbfl_memory_device_realloc(&dev, dev.length + dev.allocsz,
+ dev.allocsz);
+ }
+
+ dev.buffer[dev.pos++] = (unsigned char)c;
+ }
+
+ mbfl_memory_device_result(&dev, &string);
+ if (mbfl_strcut(&string, &result, offset, length)) {
+ hexdump(&result);
+ mbfl_string_clear(&result);
+ } else {
+ printf("***ERROR***\n");
+ }
+ mbfl_string_clear(&string);
+ } while (!final);
+
+ return EXIT_SUCCESS;
+}
diff --git a/ext/mbstring/libmbfl/tests/strcut.tests/Makefile.am b/ext/mbstring/libmbfl/tests/strcut.tests/Makefile.am
new file mode 100644
index 0000000000..37713c3952
--- /dev/null
+++ b/ext/mbstring/libmbfl/tests/strcut.tests/Makefile.am
@@ -0,0 +1 @@
+EXTRA_DIST=*.exp
diff --git a/ext/mbstring/libmbfl/tests/strcut.tests/iso2022jp.exp b/ext/mbstring/libmbfl/tests/strcut.tests/iso2022jp.exp
new file mode 100644
index 0000000000..f203bbf5f0
--- /dev/null
+++ b/ext/mbstring/libmbfl/tests/strcut.tests/iso2022jp.exp
@@ -0,0 +1,129 @@
+#!/usr/bin/expect -f
+proc begin_strcut_test {_from _length} {
+ global spawn_id from length
+ set from $_from
+ set length $_length
+
+ spawn tests/strcut Japanese "ISO-2022-JP" $_from $_length
+ set timeout 10
+
+ expect_after {
+ "\[^\r\n\]*\r\n" { fail $test }
+ }
+}
+
+begin_strcut_test -1 2
+
+set test "asciish characters ($from, $length)"
+send "testtest\r"
+expect {
+ -ex "***ERROR***\r\n" { pass $test }
+}
+
+set test "non-asciish characters ($from, $length)"
+send "\x1b\$B%F%9%H%F%9%H\x1b(B\r"
+sleep 1
+expect {
+ -ex "***ERROR***\r\n" { pass $test }
+}
+
+close
+begin_strcut_test 2 -1
+
+set test "asciish characters ($from, $length)"
+send "testtest\r"
+expect {
+ -ex "***ERROR***\r\n" { pass $test }
+}
+
+set test "non-asciish characters ($from, $length)"
+send "\x1b\$B%F%9%H%F%9%H\x1b(B\r"
+sleep 1
+expect {
+ -ex "***ERROR***\r\n" { pass $test }
+}
+
+close
+begin_strcut_test 3 2
+
+set test "asciish characters ($from, $length)"
+send "testtest\r"
+expect {
+ -ex "%74%74 (2)\r\n" { pass $test }
+}
+
+set test "non-asciish characters ($from, $length)"
+send "\x1b\$B%F%9%H%F%9%H\x1b(B\r"
+sleep 1
+expect {
+ -ex " (0)\r\n" { pass $test }
+}
+
+close
+begin_strcut_test 5 8
+
+set test "asciish characters ($from, $length)"
+send "testtest\r"
+expect {
+ -ex "%65%73%74 (3)\r\n" { pass $test }
+}
+
+set test "non-asciish characters ($from, $length)"
+sleep 1
+send "\x1b\$B%F%9%H%F%9%H\x1b(B\r"
+sleep 1
+expect {
+ -ex "%1b%24%42%25%39%1b%28%42 (8)\r\n" { pass $test }
+}
+
+close
+begin_strcut_test 1 15
+
+set test "asciish characters ($from, $length)"
+send "testestestestestes\r"
+expect {
+ "%65%73%74%65%73%74%65%73%74%65%73%74%65%73%74 (15)\r\n" { pass $test }
+}
+
+set test "non-asciish characters ($from, $length)"
+send "\x1b\$B%F%9%H%F%9%H\x1b(B\r"
+sleep 1
+expect {
+ -ex "%1b%24%42%25%46%25%39%25%48%25%46%1b%28%42 (14)\r\n" { pass $test }
+}
+close
+begin_strcut_test 8 20
+
+set test "non-asciish characters (2) ($from, $length)"
+send "\x1b\x24\x42\x25\x46\x1b\x28\x42\x74\x1b\x24\x42\x25\x39\x1b\x28\x42\x74\x1b\x24\x42\x25\x48\x24\x46\x24\x39\x24\x48\x1b\x28\x49\x4a\x5e\x4a\x5e\x4a\x5e\x43\x3d\x44\x1b\x28\x42\x74\x1b\x24\x42\x25\x46\x1b\x28\x42\x74\x1b\x24\x42\x25\x39\x1b\x28\x42\x74\x1b\x24\x42\x25\x48\x1b\x28\x42\x74\x1b\x24\x42\x25\x46\x1b\x28\x42\x74\x1b\x24\x42\x25\x39\x1b\x28\x42\x74\x1b\x24\x42\x25\x48\x1b\x28\x42\x74\x1b\x24\x42\x25\x46\x1b\x28\x42\x74\x1b\x24\x42\x25\x39\x1b\x28\x42\x74\x1b\x24\x42\x25\x48\x1b\x28\x42\x74\r"
+sleep 1
+expect {
+ -ex "%74%1b%24%42%25%39%1b%28%42%74%1b%24%42%25%48%24%46%1b%28%42 (20)\r\n" {
+ pass $test
+ }
+}
+
+begin_strcut_test 8 21
+
+set test "non-asciish characters (2) ($from, $length)"
+sleep 1
+send "\x1b\x24\x42\x25\x46\x1b\x28\x42\x74\x1b\x24\x42\x25\x39\x1b\x28\x42\x74\x1b\x24\x42\x25\x48\x24\x46\x24\x39\x24\x48\x1b\x28\x49\x4a\x5e\x4a\x5e\x4a\x5e\x43\x3d\x44\x1b\x28\x42\x74\x1b\x24\x42\x25\x46\x1b\x28\x42\x74\x1b\x24\x42\x25\x39\x1b\x28\x42\x74\x1b\x24\x42\x25\x48\x1b\x28\x42\x74\x1b\x24\x42\x25\x46\x1b\x28\x42\x74\x1b\x24\x42\x25\x39\x1b\x28\x42\x74\x1b\x24\x42\x25\x48\x1b\x28\x42\x74\x1b\x24\x42\x25\x46\x1b\x28\x42\x74\x1b\x24\x42\x25\x39\x1b\x28\x42\x74\x1b\x24\x42\x25\x48\x1b\x28\x42\x74\r"
+expect {
+ -ex "%74%1b%24%42%25%39%1b%28%42%74%1b%24%42%25%48%24%46%1b%28%42 (20)\r\n" {
+ pass $test
+ }
+}
+
+begin_strcut_test 11 17
+
+set test "non-asciish characters (2) ($from, $length)"
+sleep 1
+send "\x1b\x24\x42\x25\x46\x1b\x28\x42\x74\x1b\x24\x42\x25\x39\x1b\x28\x42\x74\x1b\x24\x42\x25\x48\x24\x46\x24\x39\x24\x48\x1b\x28\x49\x4a\x5e\x4a\x5e\x4a\x5e\x43\x3d\x44\x1b\x28\x42\x74\x1b\x24\x42\x25\x46\x1b\x28\x42\x74\x1b\x24\x42\x25\x39\x1b\x28\x42\x74\x1b\x24\x42\x25\x48\x1b\x28\x42\x74\x1b\x24\x42\x25\x46\x1b\x28\x42\x74\x1b\x24\x42\x25\x39\x1b\x28\x42\x74\x1b\x24\x42\x25\x48\x1b\x28\x42\x74\x1b\x24\x42\x25\x46\x1b\x28\x42\x74\x1b\x24\x42\x25\x39\x1b\x28\x42\x74\x1b\x24\x42\x25\x48\x1b\x28\x42\x74\r"
+expect {
+ -ex "%1b%24%42%25%39%1b%28%42%74%1b%24%42%25%48%1b%28%42 (17)\r\n" {
+ pass $test
+ }
+}
+
+
+# vim: sts=4 sw=4 ts=4 et
diff --git a/ext/mbstring/libmbfl/tests/strcut.tests/ujis.exp b/ext/mbstring/libmbfl/tests/strcut.tests/ujis.exp
new file mode 100644
index 0000000000..8ad6f95635
--- /dev/null
+++ b/ext/mbstring/libmbfl/tests/strcut.tests/ujis.exp
@@ -0,0 +1,91 @@
+#!/usr/bin/expect -f
+proc begin_strcut_test {_from _length} {
+ global spawn_id from length
+ set from $_from
+ set length $_length
+
+ spawn tests/strcut Japanese EUC-JP $_from $_length
+ set timeout 1
+
+ expect_after {
+ "\[^\r\n\]*\r\n" { fail $test }
+ }
+}
+
+begin_strcut_test -1 2
+
+set test "asciish characters ($from, $length)"
+send "testtest\r"
+expect {
+ -ex "***ERROR***\r\n" { pass $test }
+}
+
+set test "non-asciish characters ($from, $length)"
+send "¥Æ¥¹¥È¥Æ¥¹¥È\r"
+expect {
+ -ex "***ERROR***\r\n" { pass $test }
+}
+
+close
+begin_strcut_test 2 -1
+
+set test "asciish characters ($from, $length)"
+send "testtest\r"
+expect {
+ -ex "***ERROR***\r\n" { pass $test }
+}
+
+set test "non-asciish characters ($from, $length)"
+send "¥Æ¥¹¥È¥Æ¥¹¥È\r"
+expect {
+ -ex "***ERROR***\r\n" { pass $test }
+}
+
+close
+begin_strcut_test 3 2
+
+set test "asciish characters ($from, $length)"
+send "testtest\r"
+expect {
+ -ex "%74%74 (2)\r\n" { pass $test }
+}
+
+set test "non-asciish characters ($from, $length)"
+send "¥Æ¥¹¥È¥Æ¥¹¥È\r"
+expect {
+ -ex "%a5%b9 (2)\r\n" { pass $test }
+}
+
+close
+begin_strcut_test 5 8
+
+set test "asciish characters ($from, $length)"
+send "testtest\r"
+expect {
+ -ex "%65%73%74 (3)\r\n" { pass $test }
+}
+
+set test "non-asciish characters ($from, $length)"
+send "¥Æ¥¹¥È¥Æ¥¹¥È\r"
+expect {
+ -ex "%a5%c8%a5%c6%a5%b9%a5%c8 (8)\r\n" { pass $test }
+}
+
+close
+begin_strcut_test 1 15
+
+set test "asciish characters ($from, $length)"
+send "testestestestestes\r"
+expect {
+ "%65%73%74%65%73%74%65%73%74%65%73%74%65%73%74 (15)\r\n" { pass $test }
+}
+
+set test "non-asciish characters ($from, $length)"
+send "¥Æ¥¹¥È¥Æ¥¹¥È\r"
+expect {
+ -ex "%a5%c6%a5%b9%a5%c8%a5%c6%a5%b9%a5%c8 (12)\r\n" { pass $test }
+}
+close
+
+
+# vim: sts=4 sw=4 ts=4 et encoding=EUC-JP
diff --git a/ext/mbstring/libmbfl/tests/strcut.tests/utf8.exp b/ext/mbstring/libmbfl/tests/strcut.tests/utf8.exp
new file mode 100644
index 0000000000..5104bf1905
--- /dev/null
+++ b/ext/mbstring/libmbfl/tests/strcut.tests/utf8.exp
@@ -0,0 +1,91 @@
+#!/usr/bin/expect -f
+proc begin_strcut_test {_from _length} {
+ global spawn_id from length
+ set from $_from
+ set length $_length
+
+ spawn tests/strcut Japanese UTF-8 $_from $_length
+ set timeout 1
+
+ expect_after {
+ "\[^\r\n\]*\r\n" { fail $test }
+ }
+}
+
+begin_strcut_test -1 2
+
+set test "asciish characters ($from, $length)"
+send "testtest\r"
+expect {
+ -ex "***ERROR***\r\n" { pass $test }
+}
+
+set test "non-asciish characters ($from, $length)"
+send "ãã¹ããã¹ã\r"
+expect {
+ -ex "***ERROR***\r\n" { pass $test }
+}
+
+close
+begin_strcut_test 2 -1
+
+set test "asciish characters ($from, $length)"
+send "testtest\r"
+expect {
+ -ex "***ERROR***\r\n" { pass $test }
+}
+
+set test "non-asciish characters ($from, $length)"
+send "ãã¹ããã¹ã\r"
+expect {
+ -ex "***ERROR***\r\n" { pass $test }
+}
+
+close
+begin_strcut_test 3 2
+
+set test "asciish characters ($from, $length)"
+send "testtest\r"
+expect {
+ -ex "%74%74 (2)\r\n" { pass $test }
+}
+
+set test "non-asciish characters ($from, $length)"
+send "ãã¹ããã¹ã\r"
+expect {
+ -ex "(0)\r\n" { pass $test }
+}
+
+close
+begin_strcut_test 5 8
+
+set test "asciish characters ($from, $length)"
+send "testtest\r"
+expect {
+ -ex "%65%73%74 (3)\r\n" { pass $test }
+}
+
+set test "non-asciish characters ($from, $length)"
+send "ãã¹ããã¹ã\r"
+expect {
+ -ex "%e3%82%b9%e3%83%88 (6)\r\n" { pass $test }
+}
+
+close
+begin_strcut_test 1 15
+
+set test "asciish characters ($from, $length)"
+send "testestestestestes\r"
+expect {
+ "%65%73%74%65%73%74%65%73%74%65%73%74%65%73%74 (15)\r\n" { pass $test }
+}
+
+set test "non-asciish characters ($from, $length)"
+send "ãã¹ããã¹ã\r"
+expect {
+ -ex "%e3%83%86%e3%82%b9%e3%83%88%e3%83%86%e3%82%b9 (15)\r\n" { pass $test }
+}
+close
+
+
+# vim: sts=4 sw=4 ts=4 et encoding=UTF-8
diff --git a/ext/mbstring/libmbfl/tests/strwidth.tests/Makefile.am b/ext/mbstring/libmbfl/tests/strwidth.tests/Makefile.am
new file mode 100644
index 0000000000..37713c3952
--- /dev/null
+++ b/ext/mbstring/libmbfl/tests/strwidth.tests/Makefile.am
@@ -0,0 +1 @@
+EXTRA_DIST=*.exp
diff --git a/ext/mbstring/libmbfl/tests/strwidth.tests/conv_encoding.c b/ext/mbstring/libmbfl/tests/strwidth.tests/conv_encoding.c
new file mode 100644
index 0000000000..9769964743
--- /dev/null
+++ b/ext/mbstring/libmbfl/tests/strwidth.tests/conv_encoding.c
@@ -0,0 +1,104 @@
+/**
+ * this is a small sample script to use libmbfl.
+ * Rui Hirokawa
+ *
+ * this file is encoded in EUC-JP.
+ */
+
+#include
+#include
+#include
+#include "mbfl/mbfilter.h"
+
+static void hexdump(const mbfl_string *ptr)
+{
+ unsigned int i;
+
+ for (i = 0; i < ptr->len; i++) {
+ printf("%%%02x", ptr->val[i]);
+ }
+
+ printf(" (%u)\n", ptr->len);
+}
+
+int main(int argc, char **argv)
+{
+ enum mbfl_no_encoding from_encoding, to_encoding;
+ enum mbfl_no_language no_language;
+ mbfl_buffer_converter *convd = NULL;
+ mbfl_memory_device dev;
+ mbfl_string string, result, *ret;
+ int final = 0;
+ int state = 0;
+
+ if (argc < 4) {
+ fprintf(stderr, "Usage: %s lang to_encoding from_encoding\n", argv[0]);
+ return EXIT_FAILURE;
+ }
+
+ if ((no_language = mbfl_name2no_language(argv[1])) ==
+ mbfl_no_language_invalid) {
+ printf("Unsupported NLS: %s\n", argv[1]);
+ return EXIT_FAILURE;
+ }
+
+ if ((to_encoding = mbfl_name2no_encoding(argv[2])) ==
+ mbfl_no_encoding_invalid) {
+ printf("Unsupported encoding: %s\n", argv[2]);
+ return EXIT_FAILURE;
+ }
+
+ if ((from_encoding = mbfl_name2no_encoding(argv[3])) ==
+ mbfl_no_encoding_invalid) {
+ printf("Unsupported encoding: %s\n", argv[3]);
+ return EXIT_FAILURE;
+ }
+
+ convd = mbfl_buffer_converter_new(from_encoding, to_encoding, 0);
+
+ do {
+ mbfl_memory_device_init(&dev, 0, 4096);
+ mbfl_string_init_set(&string, no_language, from_encoding);
+
+ for (;;) {
+ const int c = fgetc(stdin);
+
+ if (c == EOF) {
+ final = 1;
+ break;
+ } else if (c == 10) {
+ if (state == 1) {
+ state = 0;
+ continue;
+ }
+ break;
+ } else if (c == 13) {
+ state = 1;
+ break;
+ }
+
+ if (dev.pos >= dev.length) {
+ if (dev.length + dev.allocsz < dev.length) {
+ printf("Unable to allocate memory\n");
+ return EXIT_FAILURE;
+ }
+
+ mbfl_memory_device_realloc(&dev, dev.length + dev.allocsz,
+ dev.allocsz);
+ }
+
+ dev.buffer[dev.pos++] = (unsigned char)c;
+ }
+
+ mbfl_memory_device_result(&dev, &string);
+ mbfl_string_init_set(&result, no_language, to_encoding);
+ ret = mbfl_buffer_converter_feed_result(convd, &string, &result);
+ hexdump(&result);
+ mbfl_string_clear(&result);
+ mbfl_string_clear(&string);
+ } while (!final);
+
+ mbfl_buffer_converter_delete(convd);
+
+ return EXIT_SUCCESS;
+}
diff --git a/ext/mbstring/libmbfl/tests/strwidth.tests/conv_encoding.tests/Makefile.am b/ext/mbstring/libmbfl/tests/strwidth.tests/conv_encoding.tests/Makefile.am
new file mode 100644
index 0000000000..37713c3952
--- /dev/null
+++ b/ext/mbstring/libmbfl/tests/strwidth.tests/conv_encoding.tests/Makefile.am
@@ -0,0 +1 @@
+EXTRA_DIST=*.exp
diff --git a/ext/mbstring/libmbfl/tests/strwidth.tests/conv_encoding.tests/cp51932_cp50220raw.exp b/ext/mbstring/libmbfl/tests/strwidth.tests/conv_encoding.tests/cp51932_cp50220raw.exp
new file mode 100644
index 0000000000..0e63ef11b7
--- /dev/null
+++ b/ext/mbstring/libmbfl/tests/strwidth.tests/conv_encoding.tests/cp51932_cp50220raw.exp
@@ -0,0 +1,33 @@
+#!/usr/bin/expect -f
+spawn tests/conv_encoding Japanese CP50220raw eucJP-win
+set timeout 1
+
+expect_after {
+ "\[^\r\n\]*\r\n" { fail $test }
+}
+
+set test "81 - 87ku"
+send "\xf5\xba\xf6\xec\xf7\xc9\xf8\xb3\xf9\xa1\xfa\xa1\xfb\xa1\r"
+expect {
+ "%1b%24%42%75%3a%76%6c%77%49%78%33%79%21%7a%21%7b%21%1b%28%42 (20)\r\n" { pass $test }
+}
+
+
+set test "kanji + kana"
+send "ÆüËÜ¸ì¥Æ¥¹¥È\r"
+expect {
+ "%1b%24%42%46%7c%4b%5c%38%6c%25%46%25%39%25%48%1b%28%42 (18)\r\n" { pass $test }
+}
+
+set test "full-width numerics"
+send "£°£±£²£³£´£µ£¶£·£¸£¹\r"
+expect {
+ "%1b%24%42%23%30%23%31%23%32%23%33%23%34%23%35%23%36%23%37%23%38%23%39%1b%28%42 (26)\r\n" { pass $test }
+}
+
+set test "full-width numerics"
+send "Î"
+expect {
+ "%1b%24%42%2d%42%1b%28%42 (8)\r\n" { pass $test }
+}
+
diff --git a/ext/mbstring/libmbfl/tests/strwidth.tests/conv_encoding.tests/ujis_sjis.exp b/ext/mbstring/libmbfl/tests/strwidth.tests/conv_encoding.tests/ujis_sjis.exp
new file mode 100644
index 0000000000..882953f71b
--- /dev/null
+++ b/ext/mbstring/libmbfl/tests/strwidth.tests/conv_encoding.tests/ujis_sjis.exp
@@ -0,0 +1,35 @@
+#!/usr/bin/expect -f
+spawn tests/conv_encoding Japanese Shift_JIS EUC-JP
+set timeout 1
+
+expect_after {
+ "\[^\r\n\]*\r\n" { fail $test }
+}
+
+set test "basic test"
+send "testtest\r"
+expect {
+ "%74%65%73%74%74%65%73%74 (8)\r\n" { pass $test }
+}
+
+
+set test "kanji + kana"
+send "ÆüËÜ¸ì¥Æ¥¹¥È\r"
+expect {
+ "%93%fa%96%7b%8c%ea%83%65%83%58%83%67 (12)\r\n" { pass $test }
+}
+
+set test "full-width numerics"
+send "£°£±£²£³£´£µ£¶£·£¸£¹\r"
+expect {
+ "%82%4f%82%50%82%51%82%52%82%53%82%54%82%55%82%56%82%57%82%58 (20)\r\n" { pass $test }
+}
+
+set test "full-width numerics"
+send "Î"
+expect {
+ "%3f (1)\r\n" { pass $test }
+}
+
+close
+# vim: sts=4 ts=4 sw=4 et encoding=EUC-JP
diff --git a/ext/mbstring/libmbfl/tests/strwidth.tests/conv_encoding.tests/utf8_sjis.exp b/ext/mbstring/libmbfl/tests/strwidth.tests/conv_encoding.tests/utf8_sjis.exp
new file mode 100644
index 0000000000..e51b5e4d6e
--- /dev/null
+++ b/ext/mbstring/libmbfl/tests/strwidth.tests/conv_encoding.tests/utf8_sjis.exp
@@ -0,0 +1,35 @@
+#!/usr/bin/expect -f
+spawn tests/conv_encoding Japanese Shift_JIS UTF-8
+set timeout 1
+
+expect_after {
+ "\[^\r\n\]*\r\n" { fail $test }
+}
+
+set test "basic test"
+send "testtest\r"
+expect {
+ "%74%65%73%74%74%65%73%74 (8)\r\n" { pass $test }
+}
+
+
+set test "kanji + kana"
+send "æ¥æ¬èªãã¹ã\r"
+expect {
+ "%93%fa%96%7b%8c%ea%83%65%83%58%83%67 (12)\r\n" { pass $test }
+}
+
+set test "full-width numerics"
+send "ï¼ï¼ï¼ï¼ï¼ï¼ï¼ï¼ï¼ï¼\r"
+expect {
+ "%82%4f%82%50%82%51%82%52%82%53%82%54%82%55%82%56%82%57%82%58 (20)\r\n" { pass $test }
+}
+
+set test "full-width numerics"
+send "ã"
+expect {
+ "%3f (1)\r\n" { pass $test }
+}
+
+close
+# vim: sts=4 ts=4 sw=4 et encoding=EUC-JP
diff --git a/ext/mbstring/libmbfl/tests/strwidth.tests/conv_kana.c b/ext/mbstring/libmbfl/tests/strwidth.tests/conv_kana.c
new file mode 100644
index 0000000000..c12fdc2f9b
--- /dev/null
+++ b/ext/mbstring/libmbfl/tests/strwidth.tests/conv_kana.c
@@ -0,0 +1,147 @@
+/**
+ * this is a small sample script to use libmbfl.
+ * Rui Hirokawa
+ *
+ * this file is encoded in EUC-JP.
+ */
+
+#include
+#include
+#include
+#include "mbfl/mbfilter.h"
+
+static void hexdump(const mbfl_string *ptr)
+{
+ unsigned int i;
+
+ for (i = 0; i < ptr->len; i++) {
+ printf("%%%02x", ptr->val[i]);
+ }
+
+ printf(" (%u)\n", ptr->len);
+}
+
+int main(int argc, char **argv)
+{
+ enum mbfl_no_encoding no_enc;
+ const enum mbfl_no_language no_lang = mbfl_no_language_japanese;
+ mbfl_memory_device dev;
+ mbfl_string string, result;
+ int final = 0;
+ int state = 0;
+ int mode = 0;
+
+ if (argc < 3) {
+ fprintf(stderr, "Usage: %s encoding flags\n", argv[0]);
+ return EXIT_FAILURE;
+ }
+
+ if ((no_enc = mbfl_name2no_encoding(argv[1])) ==
+ mbfl_no_encoding_invalid) {
+ printf("Unsupported encoding: %s\n", argv[1]);
+ return EXIT_FAILURE;
+ }
+
+ {
+ const char *p;
+
+ for (p= argv[2] + strlen(argv[2]); p > argv[2]; ) {
+ switch (*(--p)) {
+ case 'A':
+ mode |= 0x1;
+ break;
+ case 'a':
+ mode |= 0x10;
+ break;
+ case 'R':
+ mode |= 0x2;
+ break;
+ case 'r':
+ mode |= 0x20;
+ break;
+ case 'N':
+ mode |= 0x4;
+ break;
+ case 'n':
+ mode |= 0x40;
+ break;
+ case 'S':
+ mode |= 0x8;
+ break;
+ case 's':
+ mode |= 0x80;
+ break;
+ case 'K':
+ mode |= 0x100;
+ break;
+ case 'k':
+ mode |= 0x1000;
+ break;
+ case 'H':
+ mode |= 0x200;
+ break;
+ case 'h':
+ mode |= 0x2000;
+ break;
+ case 'V':
+ mode |= 0x800;
+ break;
+ case 'C':
+ mode |= 0x10000;
+ break;
+ case 'c':
+ mode |= 0x20000;
+ break;
+ case 'M':
+ mode |= 0x100000;
+ break;
+ case 'm':
+ mode |= 0x200000;
+ break;
+ }
+ }
+ }
+
+ do {
+ mbfl_memory_device_init(&dev, 0, 4096);
+ mbfl_string_init_set(&string, no_lang, no_enc);
+
+ for (;;) {
+ const int c = fgetc(stdin);
+
+ if (c == EOF) {
+ final = 1;
+ break;
+ } else if (c == 10) {
+ if (state == 1) {
+ state = 0;
+ continue;
+ }
+ break;
+ } else if (c == 13) {
+ state = 1;
+ break;
+ }
+
+ if (dev.pos >= dev.length) {
+ if (dev.length + dev.allocsz < dev.length) {
+ printf("Unable to allocate memory\n");
+ return EXIT_FAILURE;
+ }
+
+ mbfl_memory_device_realloc(&dev, dev.length + dev.allocsz,
+ dev.allocsz);
+ }
+
+ dev.buffer[dev.pos++] = (unsigned char)c;
+ }
+
+ mbfl_memory_device_result(&dev, &string);
+ mbfl_ja_jp_hantozen(&string, &result, mode);
+ hexdump(&result);
+ mbfl_string_clear(&result);
+ mbfl_string_clear(&string);
+ } while (!final);
+
+ return EXIT_SUCCESS;
+}
diff --git a/ext/mbstring/libmbfl/tests/strwidth.tests/conv_kana.tests/Makefile.am b/ext/mbstring/libmbfl/tests/strwidth.tests/conv_kana.tests/Makefile.am
new file mode 100644
index 0000000000..37713c3952
--- /dev/null
+++ b/ext/mbstring/libmbfl/tests/strwidth.tests/conv_kana.tests/Makefile.am
@@ -0,0 +1 @@
+EXTRA_DIST=*.exp
diff --git a/ext/mbstring/libmbfl/tests/strwidth.tests/conv_kana.tests/conv_kana.exp b/ext/mbstring/libmbfl/tests/strwidth.tests/conv_kana.tests/conv_kana.exp
new file mode 100644
index 0000000000..a6459fe5ac
--- /dev/null
+++ b/ext/mbstring/libmbfl/tests/strwidth.tests/conv_kana.tests/conv_kana.exp
@@ -0,0 +1,1098 @@
+#!/usr/bin/expect -f
+
+set timeout 1
+
+set test "full-width alphabets to half-width counterparts"
+spawn tests/conv_kana EUC-JP "r"
+expect_after {
+ "\[^\r\n\]*\r\n" { fail $test }
+}
+send "£á£â£ã£ä£å£æ£ç£è£é£ê£ë£ì£í£î£ï£ð£ñ£ò£ó£ô£õ£ö£÷£ø£ù£ú\r"
+expect {
+ -ex "%61%62%63%64%65%66%67%68%69%6a%6b%6c%6d%6e%6f%70%71%72%73%74%75%76%77%78%79%7a (26)\r\n" { pass $test }
+}
+send "£Á£Â£Ã£Ä£Å£Æ£Ç£È£É£Ê£Ë£Ì£Í£Î£Ï£Ð£Ñ£Ò£Ó£Ô£Õ£Ö£×£Ø£Ù£Ú\r"
+expect {
+ -ex "%41%42%43%44%45%46%47%48%49%4a%4b%4c%4d%4e%4f%50%51%52%53%54%55%56%57%58%59%5a (26)\r\n" { pass $test }
+}
+send "abcdefghijklmnopqrstuvwxyz\r"
+expect {
+ -ex "%61%62%63%64%65%66%67%68%69%6a%6b%6c%6d%6e%6f%70%71%72%73%74%75%76%77%78%79%7a (26)\r\n" { pass $test }
+}
+send "ABCDEFGHIJKLMNOPQRSTUVWXYZ\r"
+expect {
+ -ex "%41%42%43%44%45%46%47%48%49%4a%4b%4c%4d%4e%4f%50%51%52%53%54%55%56%57%58%59%5a (26)\r\n" { pass $test }
+}
+send "0123456789\r"
+expect {
+ -ex "%30%31%32%33%34%35%36%37%38%39 (10)\r\n" { pass $test }
+}
+send "£°£±£²£³£´£µ£¶£·£¸£¹\r"
+expect {
+ -ex "%a3%b1%a3%b2%a3%b3%a3%b4%a3%b5%a3%b6%a3%b7%a3%b8%a3%b9 (20)\r\n" { pass $test }
+}
+send "¡¡ \r"
+expect {
+ -ex "%a1%a1%20 (3)\r\n" { pass $test }
+}
+send "¥¢¥¤¥¦¥¨¥ª¥¬¥®¥°¥²¥´¥Ñ¥Ô¥×¥Ú¥Ý¥«¡«¥¡«¥¯¡«¥±¡«¥³¡«¥Ï¡¬¥Ò¡¬¥Õ¡¬¥Ø¡¬¥Û¡¬\r"
+expect {
+ -ex "%a5%a2%a5%a4%a5%a6%a5%a8%a5%aa%a5%ac%a5%ae%a5%b0%a5%b2%a5%b4%a5%d1%a5%d4%a5%d7%a5%da%a5%dd%a5%ab%a1%ab%a5%ad%a1%ab%a5%af%a1%ab%a5%b1%a1%ab%a5%b3%a1%ab%a5%cf%a1%ac%a5%d2%a1%ac%a5%d5%a1%ac%a5%d8%a1%ac%a5%db%a1%ac (70)\r\n" { pass $test }
+}
+send "±²³´µ¶Þ·Þ¸Þ¹ÞºÞÊßËßÌßÍßÎß\r"
+expect {
+ -ex "%8e%b1%8e%b2%8e%b3%8e%b4%8e%b5%8e%b6%8e%de%8e%b7%8e%de%8e%b8%8e%de%8e%b9%8e%de%8e%ba%8e%de%8e%ca%8e%df%8e%cb%8e%df%8e%cc%8e%df%8e%cd%8e%df%8e%ce%8e%df (50)\r\n" { pass $test }
+}
+send "¤¢¤¤¤¦¤¨¤ª¤¬¤®¤°¤²¤´¤Ñ¤Ô¤×¤Ú¤Ý¤«¡«¤¡«¤¯¡«¤±¡«¤³¡«¤Ï¡¬¤Ò¡¬¤Õ¡¬¤Ø¡¬¤Û¡¬\r"
+expect {
+ -ex "%a4%a2%a4%a4%a4%a6%a4%a8%a4%aa%a4%ac%a4%ae%a4%b0%a4%b2%a4%b4%a4%d1%a4%d4%a4%d7%a4%da%a4%dd%a4%ab%a1%ab%a4%ad%a1%ab%a4%af%a1%ab%a4%b1%a1%ab%a4%b3%a1%ab%a4%cf%a1%ac%a4%d2%a1%ac%a4%d5%a1%ac%a4%d8%a1%ac%a4%db%a1%ac (70)\r\n" { pass $test }
+}
+close
+
+set test "half-width alphabets to full-width counterparts"
+spawn tests/conv_kana EUC-JP "R"
+expect_after {
+ "\[^\r\n\]*\r\n" { fail $test }
+}
+send "£á£â£ã£ä£å£æ£ç£è£é£ê£ë£ì£í£î£ï£ð£ñ£ò£ó£ô£õ£ö£÷£ø£ù£ú\r"
+expect {
+ -ex "%a3%e1%a3%e2%a3%e3%a3%e4%a3%e5%a3%e6%a3%e7%a3%e8%a3%e9%a3%ea%a3%eb%a3%ec%a3%ed%a3%ee%a3%ef%a3%f0%a3%f1%a3%f2%a3%f3%a3%f4%a3%f5%a3%f6%a3%f7%a3%f8%a3%f9%a3%fa (52)\r\n" { pass $test }
+}
+send "£Á£Â£Ã£Ä£Å£Æ£Ç£È£É£Ê£Ë£Ì£Í£Î£Ï£Ð£Ñ£Ò£Ó£Ô£Õ£Ö£×£Ø£Ù£Ú\r"
+expect {
+ -ex "%a3%c1%a3%c2%a3%c3%a3%c4%a3%c5%a3%c6%a3%c7%a3%c8%a3%c9%a3%ca%a3%cb%a3%cc%a3%cd%a3%ce%a3%cf%a3%d0%a3%d1%a3%d2%a3%d3%a3%d4%a3%d5%a3%d6%a3%d7%a3%d8%a3%d9%a3%da (52)\r\n" { pass $test }
+}
+send "0123456789\r"
+expect {
+ -ex "%30%31%32%33%34%35%36%37%38%39 (10)\r\n" { pass $test }
+}
+send "£°£±£²£³£´£µ£¶£·£¸£¹\r"
+expect {
+ -ex "%a3%b1%a3%b2%a3%b3%a3%b4%a3%b5%a3%b6%a3%b7%a3%b8%a3%b9 (20)\r\n" { pass $test }
+}
+send "¡¡ \r"
+expect {
+ -ex "%a1%a1%20 (3)\r\n" { pass $test }
+}
+send "¥¢¥¤¥¦¥¨¥ª¥¬¥®¥°¥²¥´¥Ñ¥Ô¥×¥Ú¥Ý¥«¡«¥¡«¥¯¡«¥±¡«¥³¡«¥Ï¡¬¥Ò¡¬¥Õ¡¬¥Ø¡¬¥Û¡¬\r"
+expect {
+ -ex "%a5%a2%a5%a4%a5%a6%a5%a8%a5%aa%a5%ac%a5%ae%a5%b0%a5%b2%a5%b4%a5%d1%a5%d4%a5%d7%a5%da%a5%dd%a5%ab%a1%ab%a5%ad%a1%ab%a5%af%a1%ab%a5%b1%a1%ab%a5%b3%a1%ab%a5%cf%a1%ac%a5%d2%a1%ac%a5%d5%a1%ac%a5%d8%a1%ac%a5%db%a1%ac (70)\r\n" { pass $test }
+}
+send "±²³´µ¶Þ·Þ¸Þ¹ÞºÞÊßËßÌßÍßÎß\r"
+expect {
+ -ex "%8e%b1%8e%b2%8e%b3%8e%b4%8e%b5%8e%b6%8e%de%8e%b7%8e%de%8e%b8%8e%de%8e%b9%8e%de%8e%ba%8e%de%8e%ca%8e%df%8e%cb%8e%df%8e%cc%8e%df%8e%cd%8e%df%8e%ce%8e%df (50)\r\n" { pass $test }
+}
+send "¤¢¤¤¤¦¤¨¤ª¤¬¤®¤°¤²¤´¤Ñ¤Ô¤×¤Ú¤Ý¤«¡«¤¡«¤¯¡«¤±¡«¤³¡«¤Ï¡¬¤Ò¡¬¤Õ¡¬¤Ø¡¬¤Û¡¬\r"
+expect {
+ -ex "%a4%a2%a4%a4%a4%a6%a4%a8%a4%aa%a4%ac%a4%ae%a4%b0%a4%b2%a4%b4%a4%d1%a4%d4%a4%d7%a4%da%a4%dd%a4%ab%a1%ab%a4%ad%a1%ab%a4%af%a1%ab%a4%b1%a1%ab%a4%b3%a1%ab%a4%cf%a1%ac%a4%d2%a1%ac%a4%d5%a1%ac%a4%d8%a1%ac%a4%db%a1%ac (70)\r\n" { pass $test }
+}
+close
+
+set test "transliterate half-width alphabets to full-width counterparts and full-width to half-width at a time"
+spawn tests/conv_kana EUC-JP "Rr"
+expect_after {
+ "\[^\r\n\]*\r\n" { fail $test }
+}
+send "£á£â£ã£ä£å£æ£ç£è£é£ê£ë£ì£í£î£ï£ð£ñ£ò£ó£ô£õ£ö£÷£ø£ù£ú\r"
+expect {
+ -ex "%61%62%63%64%65%66%67%68%69%6a%6b%6c%6d%6e%6f%70%71%72%73%74%75%76%77%78%79%7a (26)\r\n" { pass $test }
+}
+send "£Á£Â£Ã£Ä£Å£Æ£Ç£È£É£Ê£Ë£Ì£Í£Î£Ï£Ð£Ñ£Ò£Ó£Ô£Õ£Ö£×£Ø£Ù£Ú\r"
+expect {
+ -ex "%41%42%43%44%45%46%47%48%49%4a%4b%4c%4d%4e%4f%50%51%52%53%54%55%56%57%58%59%5a (26)\r\n" { pass $test }
+}
+send "abcdefghijklmnopqrstuvwxyz\r"
+expect {
+ -ex "%a3%e1%a3%e2%a3%e3%a3%e4%a3%e5%a3%e6%a3%e7%a3%e8%a3%e9%a3%ea%a3%eb%a3%ec%a3%ed%a3%ee%a3%ef%a3%f0%a3%f1%a3%f2%a3%f3%a3%f4%a3%f5%a3%f6%a3%f7%a3%f8%a3%f9%a3%fa (52)\r\n" { pass $test }
+}
+send "ABCDEFGHIJKLMNOPQRSTUVWXYZ\r"
+expect {
+ -ex "%a3%c1%a3%c2%a3%c3%a3%c4%a3%c5%a3%c6%a3%c7%a3%c8%a3%c9%a3%ca%a3%cb%a3%cc%a3%cd%a3%ce%a3%cf%a3%d0%a3%d1%a3%d2%a3%d3%a3%d4%a3%d5%a3%d6%a3%d7%a3%d8%a3%d9%a3%da (52)\r\n" { pass $test }
+}
+send "0123456789\r"
+expect {
+ -ex "%30%31%32%33%34%35%36%37%38%39 (10)\r\n" { pass $test }
+}
+send "£°£±£²£³£´£µ£¶£·£¸£¹\r"
+expect {
+ -ex "%a3%b1%a3%b2%a3%b3%a3%b4%a3%b5%a3%b6%a3%b7%a3%b8%a3%b9 (20)\r\n" { pass $test }
+}
+send "¡¡ \r"
+expect {
+ -ex "%a1%a1%20 (3)\r\n" { pass $test }
+}
+send "¥¢¥¤¥¦¥¨¥ª¥¬¥®¥°¥²¥´¥Ñ¥Ô¥×¥Ú¥Ý¥«¡«¥¡«¥¯¡«¥±¡«¥³¡«¥Ï¡¬¥Ò¡¬¥Õ¡¬¥Ø¡¬¥Û¡¬\r"
+expect {
+ -ex "%a5%a2%a5%a4%a5%a6%a5%a8%a5%aa%a5%ac%a5%ae%a5%b0%a5%b2%a5%b4%a5%d1%a5%d4%a5%d7%a5%da%a5%dd%a5%ab%a1%ab%a5%ad%a1%ab%a5%af%a1%ab%a5%b1%a1%ab%a5%b3%a1%ab%a5%cf%a1%ac%a5%d2%a1%ac%a5%d5%a1%ac%a5%d8%a1%ac%a5%db%a1%ac (70)\r\n" { pass $test }
+}
+send "±²³´µ¶Þ·Þ¸Þ¹ÞºÞÊßËßÌßÍßÎß\r"
+expect {
+ -ex "%8e%b1%8e%b2%8e%b3%8e%b4%8e%b5%8e%b6%8e%de%8e%b7%8e%de%8e%b8%8e%de%8e%b9%8e%de%8e%ba%8e%de%8e%ca%8e%df%8e%cb%8e%df%8e%cc%8e%df%8e%cd%8e%df%8e%ce%8e%df (50)\r\n" { pass $test }
+}
+send "¤¢¤¤¤¦¤¨¤ª¤¬¤®¤°¤²¤´¤Ñ¤Ô¤×¤Ú¤Ý¤«¡«¤¡«¤¯¡«¤±¡«¤³¡«¤Ï¡¬¤Ò¡¬¤Õ¡¬¤Ø¡¬¤Û¡¬\r"
+expect {
+ -ex "%a4%a2%a4%a4%a4%a6%a4%a8%a4%aa%a4%ac%a4%ae%a4%b0%a4%b2%a4%b4%a4%d1%a4%d4%a4%d7%a4%da%a4%dd%a4%ab%a1%ab%a4%ad%a1%ab%a4%af%a1%ab%a4%b1%a1%ab%a4%b3%a1%ab%a4%cf%a1%ac%a4%d2%a1%ac%a4%d5%a1%ac%a4%d8%a1%ac%a4%db%a1%ac (70)\r\n" { pass $test }
+}
+close
+
+set test "full-width numerics to half-width counterparts"
+spawn tests/conv_kana EUC-JP "n"
+expect_after {
+ "\[^\r\n\]*\r\n" { fail $test }
+}
+send "£á£â£ã£ä£å£æ£ç£è£é£ê£ë£ì£í£î£ï£ð£ñ£ò£ó£ô£õ£ö£÷£ø£ù£ú\r"
+expect {
+ -ex "%a3%e1%a3%e2%a3%e3%a3%e4%a3%e5%a3%e6%a3%e7%a3%e8%a3%e9%a3%ea%a3%eb%a3%ec%a3%ed%a3%ee%a3%ef%a3%f0%a3%f1%a3%f2%a3%f3%a3%f4%a3%f5%a3%f6%a3%f7%a3%f8%a3%f9%a3%fa (52)\r\n" { pass $test }
+}
+send "£Á£Â£Ã£Ä£Å£Æ£Ç£È£É£Ê£Ë£Ì£Í£Î£Ï£Ð£Ñ£Ò£Ó£Ô£Õ£Ö£×£Ø£Ù£Ú\r"
+expect {
+ -ex "%a3%c1%a3%c2%a3%c3%a3%c4%a3%c5%a3%c6%a3%c7%a3%c8%a3%c9%a3%ca%a3%cb%a3%cc%a3%cd%a3%ce%a3%cf%a3%d0%a3%d1%a3%d2%a3%d3%a3%d4%a3%d5%a3%d6%a3%d7%a3%d8%a3%d9%a3%da (52)\r\n" { pass $test }
+}
+send "abcdefghijklmnopqrstuvwxyz\r"
+expect {
+ -ex "%61%62%63%64%65%66%67%68%69%6a%6b%6c%6d%6e%6f%70%71%72%73%74%75%76%77%78%79%7a (26)\r\n" { pass $test }
+}
+send "ABCDEFGHIJKLMNOPQRSTUVWXYZ\r"
+expect {
+ -ex "%41%42%43%44%45%46%47%48%49%4a%4b%4c%4d%4e%4f%50%51%52%53%54%55%56%57%58%59%5a (26)\r\n" { pass $test }
+}
+send "0123456789\r"
+expect {
+ -ex "%30%31%32%33%34%35%36%37%38%39 (10)\r\n" { pass $test }
+}
+send "£°£±£²£³£´£µ£¶£·£¸£¹\r"
+expect {
+ -ex "%30%31%32%33%34%35%36%37%38%39 (10)\r\n" { pass $test }
+}
+send "¡¡ \r"
+expect {
+ -ex "%a1%a1%20 (3)\r\n" { pass $test }
+}
+send "¥¢¥¤¥¦¥¨¥ª¥¬¥®¥°¥²¥´¥Ñ¥Ô¥×¥Ú¥Ý¥«¡«¥¡«¥¯¡«¥±¡«¥³¡«¥Ï¡¬¥Ò¡¬¥Õ¡¬¥Ø¡¬¥Û¡¬\r"
+expect {
+ -ex "%a5%a2%a5%a4%a5%a6%a5%a8%a5%aa%a5%ac%a5%ae%a5%b0%a5%b2%a5%b4%a5%d1%a5%d4%a5%d7%a5%da%a5%dd%a5%ab%a1%ab%a5%ad%a1%ab%a5%af%a1%ab%a5%b1%a1%ab%a5%b3%a1%ab%a5%cf%a1%ac%a5%d2%a1%ac%a5%d5%a1%ac%a5%d8%a1%ac%a5%db%a1%ac (70)\r\n" { pass $test }
+}
+send "±²³´µ¶Þ·Þ¸Þ¹ÞºÞÊßËßÌßÍßÎß\r"
+expect {
+ -ex "%8e%b1%8e%b2%8e%b3%8e%b4%8e%b5%8e%b6%8e%de%8e%b7%8e%de%8e%b8%8e%de%8e%b9%8e%de%8e%ba%8e%de%8e%ca%8e%df%8e%cb%8e%df%8e%cc%8e%df%8e%cd%8e%df%8e%ce%8e%df (50)\r\n" { pass $test }
+}
+send "¤¢¤¤¤¦¤¨¤ª¤¬¤®¤°¤²¤´¤Ñ¤Ô¤×¤Ú¤Ý¤«¡«¤¡«¤¯¡«¤±¡«¤³¡«¤Ï¡¬¤Ò¡¬¤Õ¡¬¤Ø¡¬¤Û¡¬\r"
+expect {
+ -ex "%a4%a2%a4%a4%a4%a6%a4%a8%a4%aa%a4%ac%a4%ae%a4%b0%a4%b2%a4%b4%a4%d1%a4%d4%a4%d7%a4%da%a4%dd%a4%ab%a1%ab%a4%ad%a1%ab%a4%af%a1%ab%a4%b1%a1%ab%a4%b3%a1%ab%a4%cf%a1%ac%a4%d2%a1%ac%a4%d5%a1%ac%a4%d8%a1%ac%a4%db%a1%ac (70)\r\n" { pass $test }
+}
+close
+
+set test "full-width numerics to half-width counterparts"
+spawn tests/conv_kana EUC-JP "n"
+expect_after {
+ "\[^\r\n\]*\r\n" { fail $test }
+}
+send "£á£â£ã£ä£å£æ£ç£è£é£ê£ë£ì£í£î£ï£ð£ñ£ò£ó£ô£õ£ö£÷£ø£ù£ú\r"
+expect {
+ -ex "%a3%e1%a3%e2%a3%e3%a3%e4%a3%e5%a3%e6%a3%e7%a3%e8%a3%e9%a3%ea%a3%eb%a3%ec%a3%ed%a3%ee%a3%ef%a3%f0%a3%f1%a3%f2%a3%f3%a3%f4%a3%f5%a3%f6%a3%f7%a3%f8%a3%f9%a3%fa (52)\r\n" { pass $test }
+}
+send "£Á£Â£Ã£Ä£Å£Æ£Ç£È£É£Ê£Ë£Ì£Í£Î£Ï£Ð£Ñ£Ò£Ó£Ô£Õ£Ö£×£Ø£Ù£Ú\r"
+expect {
+ -ex "%a3%c1%a3%c2%a3%c3%a3%c4%a3%c5%a3%c6%a3%c7%a3%c8%a3%c9%a3%ca%a3%cb%a3%cc%a3%cd%a3%ce%a3%cf%a3%d0%a3%d1%a3%d2%a3%d3%a3%d4%a3%d5%a3%d6%a3%d7%a3%d8%a3%d9%a3%da (52)\r\n" { pass $test }
+}
+send "abcdefghijklmnopqrstuvwxyz\r"
+expect {
+ -ex "%61%62%63%64%65%66%67%68%69%6a%6b%6c%6d%6e%6f%70%71%72%73%74%75%76%77%78%79%7a (26)\r\n" { pass $test }
+}
+send "ABCDEFGHIJKLMNOPQRSTUVWXYZ\r"
+expect {
+ -ex "%41%42%43%44%45%46%47%48%49%4a%4b%4c%4d%4e%4f%50%51%52%53%54%55%56%57%58%59%5a (26)\r\n" { pass $test }
+}
+send "0123456789\r"
+expect {
+ -ex "%30%31%32%33%34%35%36%37%38%39 (10)\r\n" { pass $test }
+}
+send "£°£±£²£³£´£µ£¶£·£¸£¹\r"
+expect {
+ -ex "%30%31%32%33%34%35%36%37%38%39 (10)\r\n" { pass $test }
+}
+send "¡¡ \r"
+expect {
+ -ex "%a1%a1%20 (3)\r\n" { pass $test }
+}
+send "¥¢¥¤¥¦¥¨¥ª¥¬¥®¥°¥²¥´¥Ñ¥Ô¥×¥Ú¥Ý¥«¡«¥¡«¥¯¡«¥±¡«¥³¡«¥Ï¡¬¥Ò¡¬¥Õ¡¬¥Ø¡¬¥Û¡¬\r"
+expect {
+ -ex "%a5%a2%a5%a4%a5%a6%a5%a8%a5%aa%a5%ac%a5%ae%a5%b0%a5%b2%a5%b4%a5%d1%a5%d4%a5%d7%a5%da%a5%dd%a5%ab%a1%ab%a5%ad%a1%ab%a5%af%a1%ab%a5%b1%a1%ab%a5%b3%a1%ab%a5%cf%a1%ac%a5%d2%a1%ac%a5%d5%a1%ac%a5%d8%a1%ac%a5%db%a1%ac (70)\r\n" { pass $test }
+}
+send "±²³´µ¶Þ·Þ¸Þ¹ÞºÞÊßËßÌßÍßÎß\r"
+expect {
+ -ex "%8e%b1%8e%b2%8e%b3%8e%b4%8e%b5%8e%b6%8e%de%8e%b7%8e%de%8e%b8%8e%de%8e%b9%8e%de%8e%ba%8e%de%8e%ca%8e%df%8e%cb%8e%df%8e%cc%8e%df%8e%cd%8e%df%8e%ce%8e%df (50)\r\n" { pass $test }
+}
+send "¤¢¤¤¤¦¤¨¤ª¤¬¤®¤°¤²¤´¤Ñ¤Ô¤×¤Ú¤Ý¤«¡«¤¡«¤¯¡«¤±¡«¤³¡«¤Ï¡¬¤Ò¡¬¤Õ¡¬¤Ø¡¬¤Û¡¬\r"
+expect {
+ -ex "%a4%a2%a4%a4%a4%a6%a4%a8%a4%aa%a4%ac%a4%ae%a4%b0%a4%b2%a4%b4%a4%d1%a4%d4%a4%d7%a4%da%a4%dd%a4%ab%a1%ab%a4%ad%a1%ab%a4%af%a1%ab%a4%b1%a1%ab%a4%b3%a1%ab%a4%cf%a1%ac%a4%d2%a1%ac%a4%d5%a1%ac%a4%d8%a1%ac%a4%db%a1%ac (70)\r\n" { pass $test }
+}
+close
+
+set test "half-width numerics to full-width counterparts"
+spawn tests/conv_kana EUC-JP "N"
+expect_after {
+ "\[^\r\n\]*\r\n" { fail $test }
+}
+send "£á£â£ã£ä£å£æ£ç£è£é£ê£ë£ì£í£î£ï£ð£ñ£ò£ó£ô£õ£ö£÷£ø£ù£ú\r"
+expect {
+ -ex "%a3%e1%a3%e2%a3%e3%a3%e4%a3%e5%a3%e6%a3%e7%a3%e8%a3%e9%a3%ea%a3%eb%a3%ec%a3%ed%a3%ee%a3%ef%a3%f0%a3%f1%a3%f2%a3%f3%a3%f4%a3%f5%a3%f6%a3%f7%a3%f8%a3%f9%a3%fa (52)\r\n" { pass $test }
+}
+send "£Á£Â£Ã£Ä£Å£Æ£Ç£È£É£Ê£Ë£Ì£Í£Î£Ï£Ð£Ñ£Ò£Ó£Ô£Õ£Ö£×£Ø£Ù£Ú\r"
+expect {
+ -ex "%a3%c1%a3%c2%a3%c3%a3%c4%a3%c5%a3%c6%a3%c7%a3%c8%a3%c9%a3%ca%a3%cb%a3%cc%a3%cd%a3%ce%a3%cf%a3%d0%a3%d1%a3%d2%a3%d3%a3%d4%a3%d5%a3%d6%a3%d7%a3%d8%a3%d9%a3%da (52)\r\n" { pass $test }
+}
+send "abcdefghijklmnopqrstuvwxyz\r"
+expect {
+ -ex "%61%62%63%64%65%66%67%68%69%6a%6b%6c%6d%6e%6f%70%71%72%73%74%75%76%77%78%79%7a (26)\r\n" { pass $test }
+}
+send "ABCDEFGHIJKLMNOPQRSTUVWXYZ\r"
+expect {
+ -ex "%41%42%43%44%45%46%47%48%49%4a%4b%4c%4d%4e%4f%50%51%52%53%54%55%56%57%58%59%5a (26)\r\n" { pass $test }
+}
+send "0123456789\r"
+expect {
+ -ex "%a3%b1%a3%b2%a3%b3%a3%b4%a3%b5%a3%b6%a3%b7%a3%b8%a3%b9 (20)\r\n" { pass $test }
+}
+send "£°£±£²£³£´£µ£¶£·£¸£¹\r"
+expect {
+ -ex "%a3%b1%a3%b2%a3%b3%a3%b4%a3%b5%a3%b6%a3%b7%a3%b8%a3%b9 (20)\r\n" { pass $test }
+}
+send "¡¡ \r"
+expect {
+ -ex "%a1%a1%20 (3)\r\n" { pass $test }
+}
+send "¥¢¥¤¥¦¥¨¥ª¥¬¥®¥°¥²¥´¥Ñ¥Ô¥×¥Ú¥Ý¥«¡«¥¡«¥¯¡«¥±¡«¥³¡«¥Ï¡¬¥Ò¡¬¥Õ¡¬¥Ø¡¬¥Û¡¬\r"
+expect {
+ -ex "%a5%a2%a5%a4%a5%a6%a5%a8%a5%aa%a5%ac%a5%ae%a5%b0%a5%b2%a5%b4%a5%d1%a5%d4%a5%d7%a5%da%a5%dd%a5%ab%a1%ab%a5%ad%a1%ab%a5%af%a1%ab%a5%b1%a1%ab%a5%b3%a1%ab%a5%cf%a1%ac%a5%d2%a1%ac%a5%d5%a1%ac%a5%d8%a1%ac%a5%db%a1%ac (70)\r\n" { pass $test }
+}
+send "±²³´µ¶Þ·Þ¸Þ¹ÞºÞÊßËßÌßÍßÎß\r"
+expect {
+ -ex "%8e%b1%8e%b2%8e%b3%8e%b4%8e%b5%8e%b6%8e%de%8e%b7%8e%de%8e%b8%8e%de%8e%b9%8e%de%8e%ba%8e%de%8e%ca%8e%df%8e%cb%8e%df%8e%cc%8e%df%8e%cd%8e%df%8e%ce%8e%df (50)\r\n" { pass $test }
+}
+send "¤¢¤¤¤¦¤¨¤ª¤¬¤®¤°¤²¤´¤Ñ¤Ô¤×¤Ú¤Ý¤«¡«¤¡«¤¯¡«¤±¡«¤³¡«¤Ï¡¬¤Ò¡¬¤Õ¡¬¤Ø¡¬¤Û¡¬\r"
+expect {
+ -ex "%a4%a2%a4%a4%a4%a6%a4%a8%a4%aa%a4%ac%a4%ae%a4%b0%a4%b2%a4%b4%a4%d1%a4%d4%a4%d7%a4%da%a4%dd%a4%ab%a1%ab%a4%ad%a1%ab%a4%af%a1%ab%a4%b1%a1%ab%a4%b3%a1%ab%a4%cf%a1%ac%a4%d2%a1%ac%a4%d5%a1%ac%a4%d8%a1%ac%a4%db%a1%ac (70)\r\n" { pass $test }
+}
+close
+
+set test "transliterate half-width numerics to full-width counterparts and full-width to half-width at a time"
+spawn tests/conv_kana EUC-JP "nN"
+expect_after {
+ "\[^\r\n\]*\r\n" { fail $test }
+}
+send "£á£â£ã£ä£å£æ£ç£è£é£ê£ë£ì£í£î£ï£ð£ñ£ò£ó£ô£õ£ö£÷£ø£ù£ú\r"
+expect {
+ -ex "%a3%e1%a3%e2%a3%e3%a3%e4%a3%e5%a3%e6%a3%e7%a3%e8%a3%e9%a3%ea%a3%eb%a3%ec%a3%ed%a3%ee%a3%ef%a3%f0%a3%f1%a3%f2%a3%f3%a3%f4%a3%f5%a3%f6%a3%f7%a3%f8%a3%f9%a3%fa (52)\r\n" { pass $test }
+}
+send "£Á£Â£Ã£Ä£Å£Æ£Ç£È£É£Ê£Ë£Ì£Í£Î£Ï£Ð£Ñ£Ò£Ó£Ô£Õ£Ö£×£Ø£Ù£Ú\r"
+expect {
+ -ex "%a3%c1%a3%c2%a3%c3%a3%c4%a3%c5%a3%c6%a3%c7%a3%c8%a3%c9%a3%ca%a3%cb%a3%cc%a3%cd%a3%ce%a3%cf%a3%d0%a3%d1%a3%d2%a3%d3%a3%d4%a3%d5%a3%d6%a3%d7%a3%d8%a3%d9%a3%da (52)\r\n" { pass $test }
+}
+send "abcdefghijklmnopqrstuvwxyz\r"
+expect {
+ -ex "%61%62%63%64%65%66%67%68%69%6a%6b%6c%6d%6e%6f%70%71%72%73%74%75%76%77%78%79%7a (26)\r\n" { pass $test }
+}
+send "ABCDEFGHIJKLMNOPQRSTUVWXYZ\r"
+expect {
+ -ex "%41%42%43%44%45%46%47%48%49%4a%4b%4c%4d%4e%4f%50%51%52%53%54%55%56%57%58%59%5a (26)\r\n" { pass $test }
+}
+send "0123456789\r"
+expect {
+ -ex "%a3%b0%a3%b1%a3%b2%a3%b3%a3%b4%a3%b5%a3%b6%a3%b7%a3%b8%a3%b9 (20)\r\n" { pass $test }
+}
+send "£°£±£²£³£´£µ£¶£·£¸£¹\r"
+expect {
+ -ex "%30%31%32%33%34%35%36%37%38%39 (10)\r\n" { pass $test }
+}
+send "¡¡ \r"
+expect {
+ -ex "%a1%a1%20 (3)\r\n" { pass $test }
+}
+send "¥¢¥¤¥¦¥¨¥ª¥¬¥®¥°¥²¥´¥Ñ¥Ô¥×¥Ú¥Ý¥«¡«¥¡«¥¯¡«¥±¡«¥³¡«¥Ï¡¬¥Ò¡¬¥Õ¡¬¥Ø¡¬¥Û¡¬\r"
+expect {
+ -ex "%a5%a2%a5%a4%a5%a6%a5%a8%a5%aa%a5%ac%a5%ae%a5%b0%a5%b2%a5%b4%a5%d1%a5%d4%a5%d7%a5%da%a5%dd%a5%ab%a1%ab%a5%ad%a1%ab%a5%af%a1%ab%a5%b1%a1%ab%a5%b3%a1%ab%a5%cf%a1%ac%a5%d2%a1%ac%a5%d5%a1%ac%a5%d8%a1%ac%a5%db%a1%ac (70)\r\n" { pass $test }
+}
+send "±²³´µ¶Þ·Þ¸Þ¹ÞºÞÊßËßÌßÍßÎß\r"
+expect {
+ -ex "%8e%b1%8e%b2%8e%b3%8e%b4%8e%b5%8e%b6%8e%de%8e%b7%8e%de%8e%b8%8e%de%8e%b9%8e%de%8e%ba%8e%de%8e%ca%8e%df%8e%cb%8e%df%8e%cc%8e%df%8e%cd%8e%df%8e%ce%8e%df (50)\r\n" { pass $test }
+}
+send "¤¢¤¤¤¦¤¨¤ª¤¬¤®¤°¤²¤´¤Ñ¤Ô¤×¤Ú¤Ý¤«¡«¤¡«¤¯¡«¤±¡«¤³¡«¤Ï¡¬¤Ò¡¬¤Õ¡¬¤Ø¡¬¤Û¡¬\r"
+expect {
+ -ex "%a4%a2%a4%a4%a4%a6%a4%a8%a4%aa%a4%ac%a4%ae%a4%b0%a4%b2%a4%b4%a4%d1%a4%d4%a4%d7%a4%da%a4%dd%a4%ab%a1%ab%a4%ad%a1%ab%a4%af%a1%ab%a4%b1%a1%ab%a4%b3%a1%ab%a4%cf%a1%ac%a4%d2%a1%ac%a4%d5%a1%ac%a4%d8%a1%ac%a4%db%a1%ac (70)\r\n" { pass $test }
+}
+close
+
+set test "full-width alphanumerics to half-width counterparts"
+spawn tests/conv_kana EUC-JP "a"
+expect_after {
+ "\[^\r\n\]*\r\n" { fail $test }
+}
+send "£á£â£ã£ä£å£æ£ç£è£é£ê£ë£ì£í£î£ï£ð£ñ£ò£ó£ô£õ£ö£÷£ø£ù£ú\r"
+expect {
+ -ex "%61%62%63%64%65%66%67%68%69%6a%6b%6c%6d%6e%6f%70%71%72%73%74%75%76%77%78%79%7a (26)\r\n" { pass $test }
+}
+send "£Á£Â£Ã£Ä£Å£Æ£Ç£È£É£Ê£Ë£Ì£Í£Î£Ï£Ð£Ñ£Ò£Ó£Ô£Õ£Ö£×£Ø£Ù£Ú\r"
+expect {
+ -ex "%41%42%43%44%45%46%47%48%49%4a%4b%4c%4d%4e%4f%50%51%52%53%54%55%56%57%58%59%5a (26)\r\n" { pass $test }
+}
+send "abcdefghijklmnopqrstuvwxyz\r"
+expect {
+ -ex "%61%62%63%64%65%66%67%68%69%6a%6b%6c%6d%6e%6f%70%71%72%73%74%75%76%77%78%79%7a (26)\r\n" { pass $test }
+}
+send "ABCDEFGHIJKLMNOPQRSTUVWXYZ\r"
+expect {
+ -ex "%41%42%43%44%45%46%47%48%49%4a%4b%4c%4d%4e%4f%50%51%52%53%54%55%56%57%58%59%5a (26)\r\n" { pass $test }
+}
+send "0123456789\r"
+expect {
+ -ex "%30%31%32%33%34%35%36%37%38%39 (10)\r\n" { pass $test }
+}
+send "£°£±£²£³£´£µ£¶£·£¸£¹\r"
+expect {
+ -ex "%30%31%32%33%34%35%36%37%38%39 (10)\r\n" { pass $test }
+}
+send "¡¡ \r"
+expect {
+ -ex "%a1%a1%20 (3)\r\n" { pass $test }
+}
+send "¥¢¥¤¥¦¥¨¥ª¥¬¥®¥°¥²¥´¥Ñ¥Ô¥×¥Ú¥Ý¥«¡«¥¡«¥¯¡«¥±¡«¥³¡«¥Ï¡¬¥Ò¡¬¥Õ¡¬¥Ø¡¬¥Û¡¬\r"
+expect {
+ -ex "%a5%a2%a5%a4%a5%a6%a5%a8%a5%aa%a5%ac%a5%ae%a5%b0%a5%b2%a5%b4%a5%d1%a5%d4%a5%d7%a5%da%a5%dd%a5%ab%a1%ab%a5%ad%a1%ab%a5%af%a1%ab%a5%b1%a1%ab%a5%b3%a1%ab%a5%cf%a1%ac%a5%d2%a1%ac%a5%d5%a1%ac%a5%d8%a1%ac%a5%db%a1%ac (70)\r\n" { pass $test }
+}
+send "±²³´µ¶Þ·Þ¸Þ¹ÞºÞÊßËßÌßÍßÎß\r"
+expect {
+ -ex "%8e%b1%8e%b2%8e%b3%8e%b4%8e%b5%8e%b6%8e%de%8e%b7%8e%de%8e%b8%8e%de%8e%b9%8e%de%8e%ba%8e%de%8e%ca%8e%df%8e%cb%8e%df%8e%cc%8e%df%8e%cd%8e%df%8e%ce%8e%df (50)\r\n" { pass $test }
+}
+send "¤¢¤¤¤¦¤¨¤ª¤¬¤®¤°¤²¤´¤Ñ¤Ô¤×¤Ú¤Ý¤«¡«¤¡«¤¯¡«¤±¡«¤³¡«¤Ï¡¬¤Ò¡¬¤Õ¡¬¤Ø¡¬¤Û¡¬\r"
+expect {
+ -ex "%a4%a2%a4%a4%a4%a6%a4%a8%a4%aa%a4%ac%a4%ae%a4%b0%a4%b2%a4%b4%a4%d1%a4%d4%a4%d7%a4%da%a4%dd%a4%ab%a1%ab%a4%ad%a1%ab%a4%af%a1%ab%a4%b1%a1%ab%a4%b3%a1%ab%a4%cf%a1%ac%a4%d2%a1%ac%a4%d5%a1%ac%a4%d8%a1%ac%a4%db%a1%ac (70)\r\n" { pass $test }
+}
+close
+
+set test "half-width alphanumerics to full-width counterparts"
+spawn tests/conv_kana EUC-JP "A"
+expect_after {
+ "\[^\r\n\]*\r\n" { fail $test }
+}
+send "£á£â£ã£ä£å£æ£ç£è£é£ê£ë£ì£í£î£ï£ð£ñ£ò£ó£ô£õ£ö£÷£ø£ù£ú\r"
+expect {
+ -ex "%a3%e1%a3%e2%a3%e3%a3%e4%a3%e5%a3%e6%a3%e7%a3%e8%a3%e9%a3%ea%a3%eb%a3%ec%a3%ed%a3%ee%a3%ef%a3%f0%a3%f1%a3%f2%a3%f3%a3%f4%a3%f5%a3%f6%a3%f7%a3%f8%a3%f9%a3%fa (52)\r\n" { pass $test }
+}
+send "£Á£Â£Ã£Ä£Å£Æ£Ç£È£É£Ê£Ë£Ì£Í£Î£Ï£Ð£Ñ£Ò£Ó£Ô£Õ£Ö£×£Ø£Ù£Ú\r"
+expect {
+ -ex "%a3%c1%a3%c2%a3%c3%a3%c4%a3%c5%a3%c6%a3%c7%a3%c8%a3%c9%a3%ca%a3%cb%a3%cc%a3%cd%a3%ce%a3%cf%a3%d0%a3%d1%a3%d2%a3%d3%a3%d4%a3%d5%a3%d6%a3%d7%a3%d8%a3%d9%a3%da (52)\r\n" { pass $test }
+}
+send "0123456789\r"
+expect {
+ -ex "%a3%b1%a3%b2%a3%b3%a3%b4%a3%b5%a3%b6%a3%b7%a3%b8%a3%b9 (20)\r\n" { pass $test }
+}
+send "£°£±£²£³£´£µ£¶£·£¸£¹\r"
+expect {
+ -ex "%a3%b1%a3%b2%a3%b3%a3%b4%a3%b5%a3%b6%a3%b7%a3%b8%a3%b9 (20)\r\n" { pass $test }
+}
+send "¥¢¥¤¥¦¥¨¥ª¥¬¥®¥°¥²¥´¥Ñ¥Ô¥×¥Ú¥Ý¥«¡«¥¡«¥¯¡«¥±¡«¥³¡«¥Ï¡¬¥Ò¡¬¥Õ¡¬¥Ø¡¬¥Û¡¬\r"
+expect {
+ -ex "%a5%a2%a5%a4%a5%a6%a5%a8%a5%aa%a5%ac%a5%ae%a5%b0%a5%b2%a5%b4%a5%d1%a5%d4%a5%d7%a5%da%a5%dd%a5%ab%a1%ab%a5%ad%a1%ab%a5%af%a1%ab%a5%b1%a1%ab%a5%b3%a1%ab%a5%cf%a1%ac%a5%d2%a1%ac%a5%d5%a1%ac%a5%d8%a1%ac%a5%db%a1%ac (70)\r\n" { pass $test }
+}
+send "±²³´µ¶Þ·Þ¸Þ¹ÞºÞÊßËßÌßÍßÎß\r"
+expect {
+ -ex "%8e%b1%8e%b2%8e%b3%8e%b4%8e%b5%8e%b6%8e%de%8e%b7%8e%de%8e%b8%8e%de%8e%b9%8e%de%8e%ba%8e%de%8e%ca%8e%df%8e%cb%8e%df%8e%cc%8e%df%8e%cd%8e%df%8e%ce%8e%df (50)\r\n" { pass $test }
+}
+send "¤¢¤¤¤¦¤¨¤ª¤¬¤®¤°¤²¤´¤Ñ¤Ô¤×¤Ú¤Ý¤«¡«¤¡«¤¯¡«¤±¡«¤³¡«¤Ï¡¬¤Ò¡¬¤Õ¡¬¤Ø¡¬¤Û¡¬\r"
+expect {
+ -ex "%a4%a2%a4%a4%a4%a6%a4%a8%a4%aa%a4%ac%a4%ae%a4%b0%a4%b2%a4%b4%a4%d1%a4%d4%a4%d7%a4%da%a4%dd%a4%ab%a1%ab%a4%ad%a1%ab%a4%af%a1%ab%a4%b1%a1%ab%a4%b3%a1%ab%a4%cf%a1%ac%a4%d2%a1%ac%a4%d5%a1%ac%a4%d8%a1%ac%a4%db%a1%ac (70)\r\n" { pass $test }
+}
+close
+
+set test "transliterate half-width alphanumerics to full-width counterparts and full-width to half-width at a time"
+spawn tests/conv_kana EUC-JP "Aa"
+expect_after {
+ "\[^\r\n\]*\r\n" { fail $test }
+}
+send "£á£â£ã£ä£å£æ£ç£è£é£ê£ë£ì£í£î£ï£ð£ñ£ò£ó£ô£õ£ö£÷£ø£ù£ú\r"
+expect {
+ -ex "%61%62%63%64%65%66%67%68%69%6a%6b%6c%6d%6e%6f%70%71%72%73%74%75%76%77%78%79%7a (26)\r\n" { pass $test }
+}
+send "£Á£Â£Ã£Ä£Å£Æ£Ç£È£É£Ê£Ë£Ì£Í£Î£Ï£Ð£Ñ£Ò£Ó£Ô£Õ£Ö£×£Ø£Ù£Ú\r"
+expect {
+ -ex "%41%42%43%44%45%46%47%48%49%4a%4b%4c%4d%4e%4f%50%51%52%53%54%55%56%57%58%59%5a (26)\r\n" { pass $test }
+}
+send "abcdefghijklmnopqrstuvwxyz\r"
+expect {
+ -ex "%a3%e1%a3%e2%a3%e3%a3%e4%a3%e5%a3%e6%a3%e7%a3%e8%a3%e9%a3%ea%a3%eb%a3%ec%a3%ed%a3%ee%a3%ef%a3%f0%a3%f1%a3%f2%a3%f3%a3%f4%a3%f5%a3%f6%a3%f7%a3%f8%a3%f9%a3%fa (52)\r\n" { pass $test }
+}
+send "ABCDEFGHIJKLMNOPQRSTUVWXYZ\r"
+expect {
+ -ex "%a3%c1%a3%c2%a3%c3%a3%c4%a3%c5%a3%c6%a3%c7%a3%c8%a3%c9%a3%ca%a3%cb%a3%cc%a3%cd%a3%ce%a3%cf%a3%d0%a3%d1%a3%d2%a3%d3%a3%d4%a3%d5%a3%d6%a3%d7%a3%d8%a3%d9%a3%da (52)\r\n" { pass $test }
+}
+send "0123456789\r"
+expect {
+ -ex "%a3%b0%a3%b1%a3%b2%a3%b3%a3%b4%a3%b5%a3%b6%a3%b7%a3%b8%a3%b9 (20)\r\n" { pass $test }
+}
+send "£°£±£²£³£´£µ£¶£·£¸£¹\r"
+expect {
+ -ex "%30%31%32%33%34%35%36%37%38%39 (10)\r\n" { pass $test }
+}
+send "¡¡ \r"
+expect {
+ -ex "%a1%a1%20 (3)\r\n" { pass $test }
+}
+send "¥¢¥¤¥¦¥¨¥ª¥¬¥®¥°¥²¥´¥Ñ¥Ô¥×¥Ú¥Ý¥«¡«¥¡«¥¯¡«¥±¡«¥³¡«¥Ï¡¬¥Ò¡¬¥Õ¡¬¥Ø¡¬¥Û¡¬\r"
+expect {
+ -ex "%a5%a2%a5%a4%a5%a6%a5%a8%a5%aa%a5%ac%a5%ae%a5%b0%a5%b2%a5%b4%a5%d1%a5%d4%a5%d7%a5%da%a5%dd%a5%ab%a1%ab%a5%ad%a1%ab%a5%af%a1%ab%a5%b1%a1%ab%a5%b3%a1%ab%a5%cf%a1%ac%a5%d2%a1%ac%a5%d5%a1%ac%a5%d8%a1%ac%a5%db%a1%ac (70)\r\n" { pass $test }
+}
+send "±²³´µ¶Þ·Þ¸Þ¹ÞºÞÊßËßÌßÍßÎß\r"
+expect {
+ -ex "%8e%b1%8e%b2%8e%b3%8e%b4%8e%b5%8e%b6%8e%de%8e%b7%8e%de%8e%b8%8e%de%8e%b9%8e%de%8e%ba%8e%de%8e%ca%8e%df%8e%cb%8e%df%8e%cc%8e%df%8e%cd%8e%df%8e%ce%8e%df (50)\r\n" { pass $test }
+}
+send "¤¢¤¤¤¦¤¨¤ª¤¬¤®¤°¤²¤´¤Ñ¤Ô¤×¤Ú¤Ý¤«¡«¤¡«¤¯¡«¤±¡«¤³¡«¤Ï¡¬¤Ò¡¬¤Õ¡¬¤Ø¡¬¤Û¡¬\r"
+expect {
+ -ex "%a4%a2%a4%a4%a4%a6%a4%a8%a4%aa%a4%ac%a4%ae%a4%b0%a4%b2%a4%b4%a4%d1%a4%d4%a4%d7%a4%da%a4%dd%a4%ab%a1%ab%a4%ad%a1%ab%a4%af%a1%ab%a4%b1%a1%ab%a4%b3%a1%ab%a4%cf%a1%ac%a4%d2%a1%ac%a4%d5%a1%ac%a4%d8%a1%ac%a4%db%a1%ac (70)\r\n" { pass $test }
+}
+close
+
+set test "full-width spaces to half-width counterparts"
+spawn tests/conv_kana EUC-JP "s"
+expect_after {
+ "\[^\r\n\]*\r\n" { fail $test }
+}
+send "£á£â£ã£ä£å£æ£ç£è£é£ê£ë£ì£í£î£ï£ð£ñ£ò£ó£ô£õ£ö£÷£ø£ù£ú\r"
+expect {
+ -ex "%a3%e1%a3%e2%a3%e3%a3%e4%a3%e5%a3%e6%a3%e7%a3%e8%a3%e9%a3%ea%a3%eb%a3%ec%a3%ed%a3%ee%a3%ef%a3%f0%a3%f1%a3%f2%a3%f3%a3%f4%a3%f5%a3%f6%a3%f7%a3%f8%a3%f9%a3%fa (52)\r\n" { pass $test }
+}
+send "£Á£Â£Ã£Ä£Å£Æ£Ç£È£É£Ê£Ë£Ì£Í£Î£Ï£Ð£Ñ£Ò£Ó£Ô£Õ£Ö£×£Ø£Ù£Ú\r"
+expect {
+ -ex "%a3%c1%a3%c2%a3%c3%a3%c4%a3%c5%a3%c6%a3%c7%a3%c8%a3%c9%a3%ca%a3%cb%a3%cc%a3%cd%a3%ce%a3%cf%a3%d0%a3%d1%a3%d2%a3%d3%a3%d4%a3%d5%a3%d6%a3%d7%a3%d8%a3%d9%a3%da (52)\r\n" { pass $test }
+}
+send "abcdefghijklmnopqrstuvwxyz\r"
+expect {
+ -ex "%61%62%63%64%65%66%67%68%69%6a%6b%6c%6d%6e%6f%70%71%72%73%74%75%76%77%78%79%7a (26)\r\n" { pass $test }
+}
+send "ABCDEFGHIJKLMNOPQRSTUVWXYZ\r"
+expect {
+ -ex "%41%42%43%44%45%46%47%48%49%4a%4b%4c%4d%4e%4f%50%51%52%53%54%55%56%57%58%59%5a (26)\r\n" { pass $test }
+}
+send "0123456789\r"
+expect {
+ -ex "%30%31%32%33%34%35%36%37%38%39 (10)\r\n" { pass $test }
+}
+send "£°£±£²£³£´£µ£¶£·£¸£¹\r"
+expect {
+ -ex "%a3%b1%a3%b2%a3%b3%a3%b4%a3%b5%a3%b6%a3%b7%a3%b8%a3%b9 (20)\r\n" { pass $test }
+}
+send "¡¡ \r"
+expect {
+ -ex "%20%20 (2)\r\n" { pass $test }
+}
+send "¥¢¥¤¥¦¥¨¥ª¥¬¥®¥°¥²¥´¥Ñ¥Ô¥×¥Ú¥Ý¥«¡«¥¡«¥¯¡«¥±¡«¥³¡«¥Ï¡¬¥Ò¡¬¥Õ¡¬¥Ø¡¬¥Û¡¬\r"
+expect {
+ -ex "%a5%a2%a5%a4%a5%a6%a5%a8%a5%aa%a5%ac%a5%ae%a5%b0%a5%b2%a5%b4%a5%d1%a5%d4%a5%d7%a5%da%a5%dd%a5%ab%a1%ab%a5%ad%a1%ab%a5%af%a1%ab%a5%b1%a1%ab%a5%b3%a1%ab%a5%cf%a1%ac%a5%d2%a1%ac%a5%d5%a1%ac%a5%d8%a1%ac%a5%db%a1%ac (70)\r\n" { pass $test }
+}
+send "±²³´µ¶Þ·Þ¸Þ¹ÞºÞÊßËßÌßÍßÎß\r"
+expect {
+ -ex "%8e%b1%8e%b2%8e%b3%8e%b4%8e%b5%8e%b6%8e%de%8e%b7%8e%de%8e%b8%8e%de%8e%b9%8e%de%8e%ba%8e%de%8e%ca%8e%df%8e%cb%8e%df%8e%cc%8e%df%8e%cd%8e%df%8e%ce%8e%df (50)\r\n" { pass $test }
+}
+send "¤¢¤¤¤¦¤¨¤ª¤¬¤®¤°¤²¤´¤Ñ¤Ô¤×¤Ú¤Ý¤«¡«¤¡«¤¯¡«¤±¡«¤³¡«¤Ï¡¬¤Ò¡¬¤Õ¡¬¤Ø¡¬¤Û¡¬\r"
+expect {
+ -ex "%a4%a2%a4%a4%a4%a6%a4%a8%a4%aa%a4%ac%a4%ae%a4%b0%a4%b2%a4%b4%a4%d1%a4%d4%a4%d7%a4%da%a4%dd%a4%ab%a1%ab%a4%ad%a1%ab%a4%af%a1%ab%a4%b1%a1%ab%a4%b3%a1%ab%a4%cf%a1%ac%a4%d2%a1%ac%a4%d5%a1%ac%a4%d8%a1%ac%a4%db%a1%ac (70)\r\n" { pass $test }
+}
+close
+
+set test "half-width spaces to full-width counterparts"
+spawn tests/conv_kana EUC-JP "S"
+expect_after {
+ "\[^\r\n\]*\r\n" { fail $test }
+}
+send "£á£â£ã£ä£å£æ£ç£è£é£ê£ë£ì£í£î£ï£ð£ñ£ò£ó£ô£õ£ö£÷£ø£ù£ú\r"
+expect {
+ -ex "%a3%e1%a3%e2%a3%e3%a3%e4%a3%e5%a3%e6%a3%e7%a3%e8%a3%e9%a3%ea%a3%eb%a3%ec%a3%ed%a3%ee%a3%ef%a3%f0%a3%f1%a3%f2%a3%f3%a3%f4%a3%f5%a3%f6%a3%f7%a3%f8%a3%f9%a3%fa (52)\r\n" { pass $test }
+}
+send "£Á£Â£Ã£Ä£Å£Æ£Ç£È£É£Ê£Ë£Ì£Í£Î£Ï£Ð£Ñ£Ò£Ó£Ô£Õ£Ö£×£Ø£Ù£Ú\r"
+expect {
+ -ex "%a3%c1%a3%c2%a3%c3%a3%c4%a3%c5%a3%c6%a3%c7%a3%c8%a3%c9%a3%ca%a3%cb%a3%cc%a3%cd%a3%ce%a3%cf%a3%d0%a3%d1%a3%d2%a3%d3%a3%d4%a3%d5%a3%d6%a3%d7%a3%d8%a3%d9%a3%da (52)\r\n" { pass $test }
+}
+send "0123456789\r"
+expect {
+ -ex "%30%31%32%33%34%35%36%37%38%39 (10)\r\n" { pass $test }
+}
+send "£°£±£²£³£´£µ£¶£·£¸£¹\r"
+expect {
+ -ex "%a3%b1%a3%b2%a3%b3%a3%b4%a3%b5%a3%b6%a3%b7%a3%b8%a3%b9 (20)\r\n" { pass $test }
+}
+send "¡¡ \r"
+expect {
+ -ex "%a1%a1%a1%a1 (4)\r\n" { pass $test }
+}
+send "¥¢¥¤¥¦¥¨¥ª¥¬¥®¥°¥²¥´¥Ñ¥Ô¥×¥Ú¥Ý¥«¡«¥¡«¥¯¡«¥±¡«¥³¡«¥Ï¡¬¥Ò¡¬¥Õ¡¬¥Ø¡¬¥Û¡¬\r"
+expect {
+ -ex "%a5%a2%a5%a4%a5%a6%a5%a8%a5%aa%a5%ac%a5%ae%a5%b0%a5%b2%a5%b4%a5%d1%a5%d4%a5%d7%a5%da%a5%dd%a5%ab%a1%ab%a5%ad%a1%ab%a5%af%a1%ab%a5%b1%a1%ab%a5%b3%a1%ab%a5%cf%a1%ac%a5%d2%a1%ac%a5%d5%a1%ac%a5%d8%a1%ac%a5%db%a1%ac (70)\r\n" { pass $test }
+}
+send "±²³´µ¶Þ·Þ¸Þ¹ÞºÞÊßËßÌßÍßÎß\r"
+expect {
+ -ex "%8e%b1%8e%b2%8e%b3%8e%b4%8e%b5%8e%b6%8e%de%8e%b7%8e%de%8e%b8%8e%de%8e%b9%8e%de%8e%ba%8e%de%8e%ca%8e%df%8e%cb%8e%df%8e%cc%8e%df%8e%cd%8e%df%8e%ce%8e%df (50)\r\n" { pass $test }
+}
+send "¤¢¤¤¤¦¤¨¤ª¤¬¤®¤°¤²¤´¤Ñ¤Ô¤×¤Ú¤Ý¤«¡«¤¡«¤¯¡«¤±¡«¤³¡«¤Ï¡¬¤Ò¡¬¤Õ¡¬¤Ø¡¬¤Û¡¬\r"
+expect {
+ -ex "%a4%a2%a4%a4%a4%a6%a4%a8%a4%aa%a4%ac%a4%ae%a4%b0%a4%b2%a4%b4%a4%d1%a4%d4%a4%d7%a4%da%a4%dd%a4%ab%a1%ab%a4%ad%a1%ab%a4%af%a1%ab%a4%b1%a1%ab%a4%b3%a1%ab%a4%cf%a1%ac%a4%d2%a1%ac%a4%d5%a1%ac%a4%d8%a1%ac%a4%db%a1%ac (70)\r\n" { pass $test }
+}
+close
+
+set test "transliterate half-width spaces to full-width counterparts and full-width to half-width at a time"
+spawn tests/conv_kana EUC-JP "Ss"
+expect_after {
+ "\[^\r\n\]*\r\n" { fail $test }
+}
+send "£á£â£ã£ä£å£æ£ç£è£é£ê£ë£ì£í£î£ï£ð£ñ£ò£ó£ô£õ£ö£÷£ø£ù£ú\r"
+expect {
+ -ex "%a3%e1%a3%e2%a3%e3%a3%e4%a3%e5%a3%e6%a3%e7%a3%e8%a3%e9%a3%ea%a3%eb%a3%ec%a3%ed%a3%ee%a3%ef%a3%f0%a3%f1%a3%f2%a3%f3%a3%f4%a3%f5%a3%f6%a3%f7%a3%f8%a3%f9%a3%fa (52)\r\n" { pass $test }
+}
+send "£Á£Â£Ã£Ä£Å£Æ£Ç£È£É£Ê£Ë£Ì£Í£Î£Ï£Ð£Ñ£Ò£Ó£Ô£Õ£Ö£×£Ø£Ù£Ú\r"
+expect {
+ -ex "%a3%c1%a3%c2%a3%c3%a3%c4%a3%c5%a3%c6%a3%c7%a3%c8%a3%c9%a3%ca%a3%cb%a3%cc%a3%cd%a3%ce%a3%cf%a3%d0%a3%d1%a3%d2%a3%d3%a3%d4%a3%d5%a3%d6%a3%d7%a3%d8%a3%d9%a3%da (52)\r\n" { pass $test }
+}
+send "abcdefghijklmnopqrstuvwxyz\r"
+expect {
+ -ex "%61%62%63%64%65%66%67%68%69%6a%6b%6c%6d%6e%6f%70%71%72%73%74%75%76%77%78%79%7a (26)\r\n" { pass $test }
+}
+send "ABCDEFGHIJKLMNOPQRSTUVWXYZ\r"
+expect {
+ -ex "%41%42%43%44%45%46%47%48%49%4a%4b%4c%4d%4e%4f%50%51%52%53%54%55%56%57%58%59%5a (26)\r\n" { pass $test }
+}
+send "0123456789\r"
+expect {
+ -ex "%30%31%32%33%34%35%36%37%38%39 (10)\r\n" { pass $test }
+}
+send "£°£±£²£³£´£µ£¶£·£¸£¹\r"
+expect {
+ -ex "%a3%b1%a3%b2%a3%b3%a3%b4%a3%b5%a3%b6%a3%b7%a3%b8%a3%b9 (20)\r\n" { pass $test }
+}
+send "¡¡ \r"
+expect {
+ -ex "%20%a1%a1 (3)\r\n" { pass $test }
+}
+send "¥¢¥¤¥¦¥¨¥ª¥¬¥®¥°¥²¥´¥Ñ¥Ô¥×¥Ú¥Ý¥«¡«¥¡«¥¯¡«¥±¡«¥³¡«¥Ï¡¬¥Ò¡¬¥Õ¡¬¥Ø¡¬¥Û¡¬\r"
+expect {
+ -ex "%a5%a2%a5%a4%a5%a6%a5%a8%a5%aa%a5%ac%a5%ae%a5%b0%a5%b2%a5%b4%a5%d1%a5%d4%a5%d7%a5%da%a5%dd%a5%ab%a1%ab%a5%ad%a1%ab%a5%af%a1%ab%a5%b1%a1%ab%a5%b3%a1%ab%a5%cf%a1%ac%a5%d2%a1%ac%a5%d5%a1%ac%a5%d8%a1%ac%a5%db%a1%ac (70)\r\n" { pass $test }
+}
+send "±²³´µ¶Þ·Þ¸Þ¹ÞºÞÊßËßÌßÍßÎß\r"
+expect {
+ -ex "%8e%b1%8e%b2%8e%b3%8e%b4%8e%b5%8e%b6%8e%de%8e%b7%8e%de%8e%b8%8e%de%8e%b9%8e%de%8e%ba%8e%de%8e%ca%8e%df%8e%cb%8e%df%8e%cc%8e%df%8e%cd%8e%df%8e%ce%8e%df (50)\r\n" { pass $test }
+}
+send "¤¢¤¤¤¦¤¨¤ª¤¬¤®¤°¤²¤´¤Ñ¤Ô¤×¤Ú¤Ý¤«¡«¤¡«¤¯¡«¤±¡«¤³¡«¤Ï¡¬¤Ò¡¬¤Õ¡¬¤Ø¡¬¤Û¡¬\r"
+expect {
+ -ex "%a4%a2%a4%a4%a4%a6%a4%a8%a4%aa%a4%ac%a4%ae%a4%b0%a4%b2%a4%b4%a4%d1%a4%d4%a4%d7%a4%da%a4%dd%a4%ab%a1%ab%a4%ad%a1%ab%a4%af%a1%ab%a4%b1%a1%ab%a4%b3%a1%ab%a4%cf%a1%ac%a4%d2%a1%ac%a4%d5%a1%ac%a4%d8%a1%ac%a4%db%a1%ac (70)\r\n" { pass $test }
+}
+close
+
+set test "full-width katakanas to half-width counterparts"
+spawn tests/conv_kana EUC-JP "k"
+expect_after {
+ "\[^\r\n\]*\r\n" { fail $test }
+}
+send "£á£â£ã£ä£å£æ£ç£è£é£ê£ë£ì£í£î£ï£ð£ñ£ò£ó£ô£õ£ö£÷£ø£ù£ú\r"
+expect {
+ -ex "%a3%e1%a3%e2%a3%e3%a3%e4%a3%e5%a3%e6%a3%e7%a3%e8%a3%e9%a3%ea%a3%eb%a3%ec%a3%ed%a3%ee%a3%ef%a3%f0%a3%f1%a3%f2%a3%f3%a3%f4%a3%f5%a3%f6%a3%f7%a3%f8%a3%f9%a3%fa (52)\r\n" { pass $test }
+}
+send "£Á£Â£Ã£Ä£Å£Æ£Ç£È£É£Ê£Ë£Ì£Í£Î£Ï£Ð£Ñ£Ò£Ó£Ô£Õ£Ö£×£Ø£Ù£Ú\r"
+expect {
+ -ex "%a3%c1%a3%c2%a3%c3%a3%c4%a3%c5%a3%c6%a3%c7%a3%c8%a3%c9%a3%ca%a3%cb%a3%cc%a3%cd%a3%ce%a3%cf%a3%d0%a3%d1%a3%d2%a3%d3%a3%d4%a3%d5%a3%d6%a3%d7%a3%d8%a3%d9%a3%da (52)\r\n" { pass $test }
+}
+send "abcdefghijklmnopqrstuvwxyz\r"
+expect {
+ -ex "%61%62%63%64%65%66%67%68%69%6a%6b%6c%6d%6e%6f%70%71%72%73%74%75%76%77%78%79%7a (26)\r\n" { pass $test }
+}
+send "ABCDEFGHIJKLMNOPQRSTUVWXYZ\r"
+expect {
+ -ex "%41%42%43%44%45%46%47%48%49%4a%4b%4c%4d%4e%4f%50%51%52%53%54%55%56%57%58%59%5a (26)\r\n" { pass $test }
+}
+send "0123456789\r"
+expect {
+ -ex "%30%31%32%33%34%35%36%37%38%39 (10)\r\n" { pass $test }
+}
+send "£°£±£²£³£´£µ£¶£·£¸£¹\r"
+expect {
+ -ex "%a3%b1%a3%b2%a3%b3%a3%b4%a3%b5%a3%b6%a3%b7%a3%b8%a3%b9 (20)\r\n" { pass $test }
+}
+send "¡¡ \r"
+expect {
+ -ex "%a1%a1%20 (3)\r\n" { pass $test }
+}
+send "¥¢¥¤¥¦¥¨¥ª¥¬¥®¥°¥²¥´¥Ñ¥Ô¥×¥Ú¥Ý¥«¡«¥¡«¥¯¡«¥±¡«¥³¡«¥Ï¡¬¥Ò¡¬¥Õ¡¬¥Ø¡¬¥Û¡¬\r"
+expect {
+ -ex "%8e%b1%8e%b2%8e%b3%8e%b4%8e%b5%8e%b6%8e%de%8e%b7%8e%de%8e%b8%8e%de%8e%b9%8e%de%8e%ba%8e%de%8e%ca%8e%df%8e%cb%8e%df%8e%cc%8e%df%8e%cd%8e%df%8e%ce%8e%df%8e%b6%8e%de%8e%b7%8e%de%8e%b8%8e%de%8e%b9%8e%de%8e%ba%8e%de%8e%ca%8e%df%8e%cb%8e%df%8e%cc%8e%df%8e%cd%8e%df%8e%ce%8e%df (90)\r\n" { pass $test }
+}
+send "±²³´µ¶Þ·Þ¸Þ¹ÞºÞÊßËßÌßÍßÎß\r"
+expect {
+ -ex "%8e%b1%8e%b2%8e%b3%8e%b4%8e%b5%8e%b6%8e%de%8e%b7%8e%de%8e%b8%8e%de%8e%b9%8e%de%8e%ba%8e%de%8e%ca%8e%df%8e%cb%8e%df%8e%cc%8e%df%8e%cd%8e%df%8e%ce%8e%df (50)\r\n" { pass $test }
+}
+send "¤¢¤¤¤¦¤¨¤ª¤¬¤®¤°¤²¤´¤Ñ¤Ô¤×¤Ú¤Ý¤«¡«¤¡«¤¯¡«¤±¡«¤³¡«¤Ï¡¬¤Ò¡¬¤Õ¡¬¤Ø¡¬¤Û¡¬\r"
+expect {
+ -ex "%a4%a2%a4%a4%a4%a6%a4%a8%a4%aa%a4%ac%a4%ae%a4%b0%a4%b2%a4%b4%a4%d1%a4%d4%a4%d7%a4%da%a4%dd%a4%ab%8e%de%a4%ad%8e%de%a4%af%8e%de%a4%b1%8e%de%a4%b3%8e%de%a4%cf%8e%df%a4%d2%8e%df%a4%d5%8e%df%a4%d8%8e%df%a4%db%8e%df (70)\r\n" { pass $test }
+}
+close
+
+set test "half-width katakanas to full-width counterparts"
+spawn tests/conv_kana EUC-JP "K"
+expect_after {
+ "\[^\r\n\]*\r\n" { fail $test }
+}
+send "£á£â£ã£ä£å£æ£ç£è£é£ê£ë£ì£í£î£ï£ð£ñ£ò£ó£ô£õ£ö£÷£ø£ù£ú\r"
+expect {
+ -ex "%a3%e1%a3%e2%a3%e3%a3%e4%a3%e5%a3%e6%a3%e7%a3%e8%a3%e9%a3%ea%a3%eb%a3%ec%a3%ed%a3%ee%a3%ef%a3%f0%a3%f1%a3%f2%a3%f3%a3%f4%a3%f5%a3%f6%a3%f7%a3%f8%a3%f9%a3%fa (52)\r\n" { pass $test }
+}
+send "£Á£Â£Ã£Ä£Å£Æ£Ç£È£É£Ê£Ë£Ì£Í£Î£Ï£Ð£Ñ£Ò£Ó£Ô£Õ£Ö£×£Ø£Ù£Ú\r"
+expect {
+ -ex "%a3%c1%a3%c2%a3%c3%a3%c4%a3%c5%a3%c6%a3%c7%a3%c8%a3%c9%a3%ca%a3%cb%a3%cc%a3%cd%a3%ce%a3%cf%a3%d0%a3%d1%a3%d2%a3%d3%a3%d4%a3%d5%a3%d6%a3%d7%a3%d8%a3%d9%a3%da (52)\r\n" { pass $test }
+}
+send "abcdefghijklmnopqrstuvwxyz\r"
+expect {
+ -ex "%61%62%63%64%65%66%67%68%69%6a%6b%6c%6d%6e%6f%70%71%72%73%74%75%76%77%78%79%7a (26)\r\n" { pass $test }
+}
+send "ABCDEFGHIJKLMNOPQRSTUVWXYZ\r"
+expect {
+ -ex "%41%42%43%44%45%46%47%48%49%4a%4b%4c%4d%4e%4f%50%51%52%53%54%55%56%57%58%59%5a (26)\r\n" { pass $test }
+}
+send "0123456789\r"
+expect {
+ -ex "%30%31%32%33%34%35%36%37%38%39 (10)\r\n" { pass $test }
+}
+send "£°£±£²£³£´£µ£¶£·£¸£¹\r"
+expect {
+ -ex "%a3%b1%a3%b2%a3%b3%a3%b4%a3%b5%a3%b6%a3%b7%a3%b8%a3%b9 (20)\r\n" { pass $test }
+}
+send "¡¡ \r"
+expect {
+ -ex "%a1%a1%20 (3)\r\n" { pass $test }
+}
+send "¥¢¥¤¥¦¥¨¥ª¥¬¥®¥°¥²¥´¥Ñ¥Ô¥×¥Ú¥Ý¥«¡«¥¡«¥¯¡«¥±¡«¥³¡«¥Ï¡¬¥Ò¡¬¥Õ¡¬¥Ø¡¬¥Û¡¬\r"
+expect {
+ -ex "%a5%a2%a5%a4%a5%a6%a5%a8%a5%aa%a5%ac%a5%ae%a5%b0%a5%b2%a5%b4%a5%d1%a5%d4%a5%d7%a5%da%a5%dd%a5%ab%a1%ab%a5%ad%a1%ab%a5%af%a1%ab%a5%b1%a1%ab%a5%b3%a1%ab%a5%cf%a1%ac%a5%d2%a1%ac%a5%d5%a1%ac%a5%d8%a1%ac%a5%db%a1%ac (70)\r\n" { pass $test }
+}
+send "±²³´µ¶Þ·Þ¸Þ¹ÞºÞÊßËßÌßÍßÎß\r"
+expect {
+ -ex "%a5%a2%a5%a4%a5%a6%a5%a8%a5%aa%a5%ab%a1%ab%a5%ad%a1%ab%a5%af%a1%ab%a5%b1%a1%ab%a5%b3%a1%ab%a5%cf%a1%ac%a5%d2%a1%ac%a5%d5%a1%ac%a5%d8%a1%ac%a5%db%a1%ac (50)\r\n" { pass $test }
+}
+send "¤¢¤¤¤¦¤¨¤ª¤¬¤®¤°¤²¤´¤Ñ¤Ô¤×¤Ú¤Ý¤«¡«¤¡«¤¯¡«¤±¡«¤³¡«¤Ï¡¬¤Ò¡¬¤Õ¡¬¤Ø¡¬¤Û¡¬\r"
+expect {
+ -ex "%a4%a2%a4%a4%a4%a6%a4%a8%a4%aa%a4%ac%a4%ae%a4%b0%a4%b2%a4%b4%a4%d1%a4%d4%a4%d7%a4%da%a4%dd%a4%ab%a1%ab%a4%ad%a1%ab%a4%af%a1%ab%a4%b1%a1%ab%a4%b3%a1%ab%a4%cf%a1%ac%a4%d2%a1%ac%a4%d5%a1%ac%a4%d8%a1%ac%a4%db%a1%ac (70)\r\n" { pass $test }
+}
+close
+
+set test "transliterate half-width katakanas to full-width counterparts and full-width to half-width at a time"
+spawn tests/conv_kana EUC-JP "kK"
+expect_after {
+ "\[^\r\n\]*\r\n" { fail $test }
+}
+send "£á£â£ã£ä£å£æ£ç£è£é£ê£ë£ì£í£î£ï£ð£ñ£ò£ó£ô£õ£ö£÷£ø£ù£ú\r"
+expect {
+ -ex "%a3%e1%a3%e2%a3%e3%a3%e4%a3%e5%a3%e6%a3%e7%a3%e8%a3%e9%a3%ea%a3%eb%a3%ec%a3%ed%a3%ee%a3%ef%a3%f0%a3%f1%a3%f2%a3%f3%a3%f4%a3%f5%a3%f6%a3%f7%a3%f8%a3%f9%a3%fa (52)\r\n" { pass $test }
+}
+send "£Á£Â£Ã£Ä£Å£Æ£Ç£È£É£Ê£Ë£Ì£Í£Î£Ï£Ð£Ñ£Ò£Ó£Ô£Õ£Ö£×£Ø£Ù£Ú\r"
+expect {
+ -ex "%a3%c1%a3%c2%a3%c3%a3%c4%a3%c5%a3%c6%a3%c7%a3%c8%a3%c9%a3%ca%a3%cb%a3%cc%a3%cd%a3%ce%a3%cf%a3%d0%a3%d1%a3%d2%a3%d3%a3%d4%a3%d5%a3%d6%a3%d7%a3%d8%a3%d9%a3%da (52)\r\n" { pass $test }
+}
+send "abcdefghijklmnopqrstuvwxyz\r"
+expect {
+ -ex "%61%62%63%64%65%66%67%68%69%6a%6b%6c%6d%6e%6f%70%71%72%73%74%75%76%77%78%79%7a (26)\r\n" { pass $test }
+}
+send "ABCDEFGHIJKLMNOPQRSTUVWXYZ\r"
+expect {
+ -ex "%41%42%43%44%45%46%47%48%49%4a%4b%4c%4d%4e%4f%50%51%52%53%54%55%56%57%58%59%5a (26)\r\n" { pass $test }
+}
+send "0123456789\r"
+expect {
+ -ex "%30%31%32%33%34%35%36%37%38%39 (10)\r\n" { pass $test }
+}
+send "£°£±£²£³£´£µ£¶£·£¸£¹\r"
+expect {
+ -ex "%a3%b1%a3%b2%a3%b3%a3%b4%a3%b5%a3%b6%a3%b7%a3%b8%a3%b9 (20)\r\n" { pass $test }
+}
+send "¡¡ \r"
+expect {
+ -ex "%a1%a1%20 (3)\r\n" { pass $test }
+}
+send "¥¢¥¤¥¦¥¨¥ª¥¬¥®¥°¥²¥´¥Ñ¥Ô¥×¥Ú¥Ý¥«¡«¥¡«¥¯¡«¥±¡«¥³¡«¥Ï¡¬¥Ò¡¬¥Õ¡¬¥Ø¡¬¥Û¡¬\r"
+expect {
+ -ex "%8e%b1%8e%b2%8e%b3%8e%b4%8e%b5%8e%b6%8e%de%8e%b7%8e%de%8e%b8%8e%de%8e%b9%8e%de%8e%ba%8e%de%8e%ca%8e%df%8e%cb%8e%df%8e%cc%8e%df%8e%cd%8e%df%8e%ce%8e%df%8e%b6%8e%de%8e%b7%8e%de%8e%b8%8e%de%8e%b9%8e%de%8e%ba%8e%de%8e%ca%8e%df%8e%cb%8e%df%8e%cc%8e%df%8e%cd%8e%df%8e%ce%8e%df (90)\r\n" { pass $test }
+}
+send "±²³´µ¶Þ·Þ¸Þ¹ÞºÞÊßËßÌßÍßÎß\r"
+expect {
+ -ex "%a5%a2%a5%a4%a5%a6%a5%a8%a5%aa%a5%ab%a1%ab%a5%ad%a1%ab%a5%af%a1%ab%a5%b1%a1%ab%a5%b3%a1%ab%a5%cf%a1%ac%a5%d2%a1%ac%a5%d5%a1%ac%a5%d8%a1%ac%a5%db%a1%ac (50)\r\n" { pass $test }
+}
+send "¤¢¤¤¤¦¤¨¤ª¤¬¤®¤°¤²¤´¤Ñ¤Ô¤×¤Ú¤Ý¤«¡«¤¡«¤¯¡«¤±¡«¤³¡«¤Ï¡¬¤Ò¡¬¤Õ¡¬¤Ø¡¬¤Û¡¬\r"
+expect {
+ -ex "%a4%a2%a4%a4%a4%a6%a4%a8%a4%aa%a4%ac%a4%ae%a4%b0%a4%b2%a4%b4%a4%d1%a4%d4%a4%d7%a4%da%a4%dd%a4%ab%8e%de%a4%ad%8e%de%a4%af%8e%de%a4%b1%8e%de%a4%b3%8e%de%a4%cf%8e%df%a4%d2%8e%df%a4%d5%8e%df%a4%d8%8e%df%a4%db%8e%df (70)\r\n" { pass $test }
+}
+close
+
+set test "full-width hiraganas to half-width katakana"
+spawn tests/conv_kana EUC-JP "h"
+expect_after {
+ "\[^\r\n\]*\r\n" { fail $test }
+}
+send "£á£â£ã£ä£å£æ£ç£è£é£ê£ë£ì£í£î£ï£ð£ñ£ò£ó£ô£õ£ö£÷£ø£ù£ú\r"
+expect {
+ -ex "%a3%e1%a3%e2%a3%e3%a3%e4%a3%e5%a3%e6%a3%e7%a3%e8%a3%e9%a3%ea%a3%eb%a3%ec%a3%ed%a3%ee%a3%ef%a3%f0%a3%f1%a3%f2%a3%f3%a3%f4%a3%f5%a3%f6%a3%f7%a3%f8%a3%f9%a3%fa (52)\r\n" { pass $test }
+}
+send "£Á£Â£Ã£Ä£Å£Æ£Ç£È£É£Ê£Ë£Ì£Í£Î£Ï£Ð£Ñ£Ò£Ó£Ô£Õ£Ö£×£Ø£Ù£Ú\r"
+expect {
+ -ex "%a3%c1%a3%c2%a3%c3%a3%c4%a3%c5%a3%c6%a3%c7%a3%c8%a3%c9%a3%ca%a3%cb%a3%cc%a3%cd%a3%ce%a3%cf%a3%d0%a3%d1%a3%d2%a3%d3%a3%d4%a3%d5%a3%d6%a3%d7%a3%d8%a3%d9%a3%da (52)\r\n" { pass $test }
+}
+send "abcdefghijklmnopqrstuvwxyz\r"
+expect {
+ -ex "%61%62%63%64%65%66%67%68%69%6a%6b%6c%6d%6e%6f%70%71%72%73%74%75%76%77%78%79%7a (26)\r\n" { pass $test }
+}
+send "ABCDEFGHIJKLMNOPQRSTUVWXYZ\r"
+expect {
+ -ex "%41%42%43%44%45%46%47%48%49%4a%4b%4c%4d%4e%4f%50%51%52%53%54%55%56%57%58%59%5a (26)\r\n" { pass $test }
+}
+send "0123456789\r"
+expect {
+ -ex "%30%31%32%33%34%35%36%37%38%39 (10)\r\n" { pass $test }
+}
+send "£°£±£²£³£´£µ£¶£·£¸£¹\r"
+expect {
+ -ex "%a3%b1%a3%b2%a3%b3%a3%b4%a3%b5%a3%b6%a3%b7%a3%b8%a3%b9 (20)\r\n" { pass $test }
+}
+send "¡¡ \r"
+expect {
+ -ex "%a1%a1%20 (3)\r\n" { pass $test }
+}
+send "¥¢¥¤¥¦¥¨¥ª¥¬¥®¥°¥²¥´¥Ñ¥Ô¥×¥Ú¥Ý¥«¡«¥¡«¥¯¡«¥±¡«¥³¡«¥Ï¡¬¥Ò¡¬¥Õ¡¬¥Ø¡¬¥Û¡¬\r"
+expect {
+ -ex "%a5%a2%a5%a4%a5%a6%a5%a8%a5%aa%a5%ac%a5%ae%a5%b0%a5%b2%a5%b4%a5%d1%a5%d4%a5%d7%a5%da%a5%dd%a5%ab%8e%de%a5%ad%8e%de%a5%af%8e%de%a5%b1%8e%de%a5%b3%8e%de%a5%cf%8e%df%a5%d2%8e%df%a5%d5%8e%df%a5%d8%8e%df%a5%db%8e%df (70)\r\n" { pass $test }
+}
+send "±²³´µ¶Þ·Þ¸Þ¹ÞºÞÊßËßÌßÍßÎß\r"
+expect {
+ -ex "%8e%b1%8e%b2%8e%b3%8e%b4%8e%b5%8e%b6%8e%de%8e%b7%8e%de%8e%b8%8e%de%8e%b9%8e%de%8e%ba%8e%de%8e%ca%8e%df%8e%cb%8e%df%8e%cc%8e%df%8e%cd%8e%df%8e%ce%8e%df (50)\r\n" { pass $test }
+}
+send "¤¢¤¤¤¦¤¨¤ª¤¬¤®¤°¤²¤´¤Ñ¤Ô¤×¤Ú¤Ý¤«¡«¤¡«¤¯¡«¤±¡«¤³¡«¤Ï¡¬¤Ò¡¬¤Õ¡¬¤Ø¡¬¤Û¡¬\r"
+expect {
+ -ex "%8e%b1%8e%b2%8e%b3%8e%b4%8e%b5%8e%b6%8e%de%8e%b7%8e%de%8e%b8%8e%de%8e%b9%8e%de%8e%ba%8e%de%8e%ca%8e%df%8e%cb%8e%df%8e%cc%8e%df%8e%cd%8e%df%8e%ce%8e%df%8e%b6%8e%de%8e%b7%8e%de%8e%b8%8e%de%8e%b9%8e%de%8e%ba%8e%de%8e%ca%8e%df%8e%cb%8e%df%8e%cc%8e%df%8e%cd%8e%df%8e%ce%8e%df (90)\r\n" { pass $test }
+}
+close
+
+set test "half-width katakanas to full-width hiragana"
+spawn tests/conv_kana EUC-JP "H"
+expect_after {
+ "\[^\r\n\]*\r\n" { fail $test }
+}
+send "£á£â£ã£ä£å£æ£ç£è£é£ê£ë£ì£í£î£ï£ð£ñ£ò£ó£ô£õ£ö£÷£ø£ù£ú\r"
+expect {
+ -ex "%a3%e1%a3%e2%a3%e3%a3%e4%a3%e5%a3%e6%a3%e7%a3%e8%a3%e9%a3%ea%a3%eb%a3%ec%a3%ed%a3%ee%a3%ef%a3%f0%a3%f1%a3%f2%a3%f3%a3%f4%a3%f5%a3%f6%a3%f7%a3%f8%a3%f9%a3%fa (52)\r\n" { pass $test }
+}
+send "£Á£Â£Ã£Ä£Å£Æ£Ç£È£É£Ê£Ë£Ì£Í£Î£Ï£Ð£Ñ£Ò£Ó£Ô£Õ£Ö£×£Ø£Ù£Ú\r"
+expect {
+ -ex "%a3%c1%a3%c2%a3%c3%a3%c4%a3%c5%a3%c6%a3%c7%a3%c8%a3%c9%a3%ca%a3%cb%a3%cc%a3%cd%a3%ce%a3%cf%a3%d0%a3%d1%a3%d2%a3%d3%a3%d4%a3%d5%a3%d6%a3%d7%a3%d8%a3%d9%a3%da (52)\r\n" { pass $test }
+}
+send "abcdefghijklmnopqrstuvwxyz\r"
+expect {
+ -ex "%61%62%63%64%65%66%67%68%69%6a%6b%6c%6d%6e%6f%70%71%72%73%74%75%76%77%78%79%7a (26)\r\n" { pass $test }
+}
+send "ABCDEFGHIJKLMNOPQRSTUVWXYZ\r"
+expect {
+ -ex "%41%42%43%44%45%46%47%48%49%4a%4b%4c%4d%4e%4f%50%51%52%53%54%55%56%57%58%59%5a (26)\r\n" { pass $test }
+}
+send "0123456789\r"
+expect {
+ -ex "%30%31%32%33%34%35%36%37%38%39 (10)\r\n" { pass $test }
+}
+send "£°£±£²£³£´£µ£¶£·£¸£¹\r"
+expect {
+ -ex "%a3%b1%a3%b2%a3%b3%a3%b4%a3%b5%a3%b6%a3%b7%a3%b8%a3%b9 (20)\r\n" { pass $test }
+}
+send "¡¡ \r"
+expect {
+ -ex "%a1%a1%20 (3)\r\n" { pass $test }
+}
+send "¥¢¥¤¥¦¥¨¥ª¥¬¥®¥°¥²¥´¥Ñ¥Ô¥×¥Ú¥Ý¥«¡«¥¡«¥¯¡«¥±¡«¥³¡«¥Ï¡¬¥Ò¡¬¥Õ¡¬¥Ø¡¬¥Û¡¬\r"
+expect {
+ -ex "%a5%a2%a5%a4%a5%a6%a5%a8%a5%aa%a5%ac%a5%ae%a5%b0%a5%b2%a5%b4%a5%d1%a5%d4%a5%d7%a5%da%a5%dd%a5%ab%a1%ab%a5%ad%a1%ab%a5%af%a1%ab%a5%b1%a1%ab%a5%b3%a1%ab%a5%cf%a1%ac%a5%d2%a1%ac%a5%d5%a1%ac%a5%d8%a1%ac%a5%db%a1%ac (70)\r\n" { pass $test }
+}
+send "±²³´µ¶Þ·Þ¸Þ¹ÞºÞÊßËßÌßÍßÎß\r"
+expect {
+ -ex "%a4%a2%a4%a4%a4%a6%a4%a8%a4%aa%a4%ab%a1%ab%a4%ad%a1%ab%a4%af%a1%ab%a4%b1%a1%ab%a4%b3%a1%ab%a4%cf%a1%ac%a4%d2%a1%ac%a4%d5%a1%ac%a4%d8%a1%ac%a4%db%a1%ac (50)\r\n" { pass $test }
+}
+send "¤¢¤¤¤¦¤¨¤ª¤¬¤®¤°¤²¤´¤Ñ¤Ô¤×¤Ú¤Ý¤«¡«¤¡«¤¯¡«¤±¡«¤³¡«¤Ï¡¬¤Ò¡¬¤Õ¡¬¤Ø¡¬¤Û¡¬\r"
+expect {
+ -ex "%a4%a2%a4%a4%a4%a6%a4%a8%a4%aa%a4%ac%a4%ae%a4%b0%a4%b2%a4%b4%a4%d1%a4%d4%a4%d7%a4%da%a4%dd%a4%ab%a1%ab%a4%ad%a1%ab%a4%af%a1%ab%a4%b1%a1%ab%a4%b3%a1%ab%a4%cf%a1%ac%a4%d2%a1%ac%a4%d5%a1%ac%a4%d8%a1%ac%a4%db%a1%ac (70)\r\n" { pass $test }
+}
+close
+
+set test "transliterate half-width katakanas to full-width hiraganas and full-width to half-width at a time"
+spawn tests/conv_kana EUC-JP "hH"
+expect_after {
+ "\[^\r\n\]*\r\n" { fail $test }
+}
+send "£á£â£ã£ä£å£æ£ç£è£é£ê£ë£ì£í£î£ï£ð£ñ£ò£ó£ô£õ£ö£÷£ø£ù£ú\r"
+expect {
+ -ex "%a3%e1%a3%e2%a3%e3%a3%e4%a3%e5%a3%e6%a3%e7%a3%e8%a3%e9%a3%ea%a3%eb%a3%ec%a3%ed%a3%ee%a3%ef%a3%f0%a3%f1%a3%f2%a3%f3%a3%f4%a3%f5%a3%f6%a3%f7%a3%f8%a3%f9%a3%fa (52)\r\n" { pass $test }
+}
+send "£Á£Â£Ã£Ä£Å£Æ£Ç£È£É£Ê£Ë£Ì£Í£Î£Ï£Ð£Ñ£Ò£Ó£Ô£Õ£Ö£×£Ø£Ù£Ú\r"
+expect {
+ -ex "%a3%c1%a3%c2%a3%c3%a3%c4%a3%c5%a3%c6%a3%c7%a3%c8%a3%c9%a3%ca%a3%cb%a3%cc%a3%cd%a3%ce%a3%cf%a3%d0%a3%d1%a3%d2%a3%d3%a3%d4%a3%d5%a3%d6%a3%d7%a3%d8%a3%d9%a3%da (52)\r\n" { pass $test }
+}
+send "abcdefghijklmnopqrstuvwxyz\r"
+expect {
+ -ex "%61%62%63%64%65%66%67%68%69%6a%6b%6c%6d%6e%6f%70%71%72%73%74%75%76%77%78%79%7a (26)\r\n" { pass $test }
+}
+send "ABCDEFGHIJKLMNOPQRSTUVWXYZ\r"
+expect {
+ -ex "%41%42%43%44%45%46%47%48%49%4a%4b%4c%4d%4e%4f%50%51%52%53%54%55%56%57%58%59%5a (26)\r\n" { pass $test }
+}
+send "0123456789\r"
+expect {
+ -ex "%30%31%32%33%34%35%36%37%38%39 (10)\r\n" { pass $test }
+}
+send "£°£±£²£³£´£µ£¶£·£¸£¹\r"
+expect {
+ -ex "%a3%b1%a3%b2%a3%b3%a3%b4%a3%b5%a3%b6%a3%b7%a3%b8%a3%b9 (20)\r\n" { pass $test }
+}
+send "¡¡ \r"
+expect {
+ -ex "%a1%a1%20 (3)\r\n" { pass $test }
+}
+send "¥¢¥¤¥¦¥¨¥ª¥¬¥®¥°¥²¥´¥Ñ¥Ô¥×¥Ú¥Ý¥«¡«¥¡«¥¯¡«¥±¡«¥³¡«¥Ï¡¬¥Ò¡¬¥Õ¡¬¥Ø¡¬¥Û¡¬\r"
+expect {
+ -ex "%a5%a2%a5%a4%a5%a6%a5%a8%a5%aa%a5%ac%a5%ae%a5%b0%a5%b2%a5%b4%a5%d1%a5%d4%a5%d7%a5%da%a5%dd%a5%ab%8e%de%a5%ad%8e%de%a5%af%8e%de%a5%b1%8e%de%a5%b3%8e%de%a5%cf%8e%df%a5%d2%8e%df%a5%d5%8e%df%a5%d8%8e%df%a5%db%8e%df (70)\r\n" { pass $test }
+}
+send "±²³´µ¶Þ·Þ¸Þ¹ÞºÞÊßËßÌßÍßÎß\r"
+expect {
+ -ex "%a4%a2%a4%a4%a4%a6%a4%a8%a4%aa%a4%ab%a1%ab%a4%ad%a1%ab%a4%af%a1%ab%a4%b1%a1%ab%a4%b3%a1%ab%a4%cf%a1%ac%a4%d2%a1%ac%a4%d5%a1%ac%a4%d8%a1%ac%a4%db%a1%ac (50)\r\n" { pass $test }
+}
+send "¤¢¤¤¤¦¤¨¤ª¤¬¤®¤°¤²¤´¤Ñ¤Ô¤×¤Ú¤Ý¤«¡«¤¡«¤¯¡«¤±¡«¤³¡«¤Ï¡¬¤Ò¡¬¤Õ¡¬¤Ø¡¬¤Û¡¬\r"
+expect {
+ -ex "%8e%b1%8e%b2%8e%b3%8e%b4%8e%b5%8e%b6%8e%de%8e%b7%8e%de%8e%b8%8e%de%8e%b9%8e%de%8e%ba%8e%de%8e%ca%8e%df%8e%cb%8e%df%8e%cc%8e%df%8e%cd%8e%df%8e%ce%8e%df%8e%b6%8e%de%8e%b7%8e%de%8e%b8%8e%de%8e%b9%8e%de%8e%ba%8e%de%8e%ca%8e%df%8e%cb%8e%df%8e%cc%8e%df%8e%cd%8e%df%8e%ce%8e%df (90)\r\n" { pass $test }
+}
+close
+
+set test "full-width katakanas to full-width hiragana"
+spawn tests/conv_kana EUC-JP "c"
+expect_after {
+ "\[^\r\n\]*\r\n" { fail $test }
+}
+send "£á£â£ã£ä£å£æ£ç£è£é£ê£ë£ì£í£î£ï£ð£ñ£ò£ó£ô£õ£ö£÷£ø£ù£ú\r"
+expect {
+ -ex "%a3%e1%a3%e2%a3%e3%a3%e4%a3%e5%a3%e6%a3%e7%a3%e8%a3%e9%a3%ea%a3%eb%a3%ec%a3%ed%a3%ee%a3%ef%a3%f0%a3%f1%a3%f2%a3%f3%a3%f4%a3%f5%a3%f6%a3%f7%a3%f8%a3%f9%a3%fa (52)\r\n" { pass $test }
+}
+send "£Á£Â£Ã£Ä£Å£Æ£Ç£È£É£Ê£Ë£Ì£Í£Î£Ï£Ð£Ñ£Ò£Ó£Ô£Õ£Ö£×£Ø£Ù£Ú\r"
+expect {
+ -ex "%a3%c1%a3%c2%a3%c3%a3%c4%a3%c5%a3%c6%a3%c7%a3%c8%a3%c9%a3%ca%a3%cb%a3%cc%a3%cd%a3%ce%a3%cf%a3%d0%a3%d1%a3%d2%a3%d3%a3%d4%a3%d5%a3%d6%a3%d7%a3%d8%a3%d9%a3%da (52)\r\n" { pass $test }
+}
+send "abcdefghijklmnopqrstuvwxyz\r"
+expect {
+ -ex "%61%62%63%64%65%66%67%68%69%6a%6b%6c%6d%6e%6f%70%71%72%73%74%75%76%77%78%79%7a (26)\r\n" { pass $test }
+}
+send "ABCDEFGHIJKLMNOPQRSTUVWXYZ\r"
+expect {
+ -ex "%41%42%43%44%45%46%47%48%49%4a%4b%4c%4d%4e%4f%50%51%52%53%54%55%56%57%58%59%5a (26)\r\n" { pass $test }
+}
+send "0123456789\r"
+expect {
+ -ex "%30%31%32%33%34%35%36%37%38%39 (10)\r\n" { pass $test }
+}
+send "£°£±£²£³£´£µ£¶£·£¸£¹\r"
+expect {
+ -ex "%a3%b1%a3%b2%a3%b3%a3%b4%a3%b5%a3%b6%a3%b7%a3%b8%a3%b9 (20)\r\n" { pass $test }
+}
+send "¡¡ \r"
+expect {
+ -ex "%a1%a1%20 (3)\r\n" { pass $test }
+}
+send "¥¢¥¤¥¦¥¨¥ª¥¬¥®¥°¥²¥´¥Ñ¥Ô¥×¥Ú¥Ý¥«¡«¥¡«¥¯¡«¥±¡«¥³¡«¥Ï¡¬¥Ò¡¬¥Õ¡¬¥Ø¡¬¥Û¡¬\r"
+expect {
+ -ex "%a4%a2%a4%a4%a4%a6%a4%a8%a4%aa%a4%ac%a4%ae%a4%b0%a4%b2%a4%b4%a4%d1%a4%d4%a4%d7%a4%da%a4%dd%a4%ab%a1%ab%a4%ad%a1%ab%a4%af%a1%ab%a4%b1%a1%ab%a4%b3%a1%ab%a4%cf%a1%ac%a4%d2%a1%ac%a4%d5%a1%ac%a4%d8%a1%ac%a4%db%a1%ac (70)\r\n" { pass $test }
+}
+send "±²³´µ¶Þ·Þ¸Þ¹ÞºÞÊßËßÌßÍßÎß\r"
+expect {
+ -ex "%8e%b1%8e%b2%8e%b3%8e%b4%8e%b5%8e%b6%8e%de%8e%b7%8e%de%8e%b8%8e%de%8e%b9%8e%de%8e%ba%8e%de%8e%ca%8e%df%8e%cb%8e%df%8e%cc%8e%df%8e%cd%8e%df%8e%ce%8e%df (50)\r\n" { pass $test }
+}
+send "¤¢¤¤¤¦¤¨¤ª¤¬¤®¤°¤²¤´¤Ñ¤Ô¤×¤Ú¤Ý¤«¡«¤¡«¤¯¡«¤±¡«¤³¡«¤Ï¡¬¤Ò¡¬¤Õ¡¬¤Ø¡¬¤Û¡¬\r"
+expect {
+ -ex "%a4%a2%a4%a4%a4%a6%a4%a8%a4%aa%a4%ac%a4%ae%a4%b0%a4%b2%a4%b4%a4%d1%a4%d4%a4%d7%a4%da%a4%dd%a4%ab%a1%ab%a4%ad%a1%ab%a4%af%a1%ab%a4%b1%a1%ab%a4%b3%a1%ab%a4%cf%a1%ac%a4%d2%a1%ac%a4%d5%a1%ac%a4%d8%a1%ac%a4%db%a1%ac (70)\r\n" { pass $test }
+}
+close
+
+set test "full-width hiraganas to full-width katakana"
+spawn tests/conv_kana EUC-JP "C"
+expect_after {
+ "\[^\r\n\]*\r\n" { fail $test }
+}
+send "£á£â£ã£ä£å£æ£ç£è£é£ê£ë£ì£í£î£ï£ð£ñ£ò£ó£ô£õ£ö£÷£ø£ù£ú\r"
+expect {
+ -ex "%a3%e1%a3%e2%a3%e3%a3%e4%a3%e5%a3%e6%a3%e7%a3%e8%a3%e9%a3%ea%a3%eb%a3%ec%a3%ed%a3%ee%a3%ef%a3%f0%a3%f1%a3%f2%a3%f3%a3%f4%a3%f5%a3%f6%a3%f7%a3%f8%a3%f9%a3%fa (52)\r\n" { pass $test }
+}
+send "£Á£Â£Ã£Ä£Å£Æ£Ç£È£É£Ê£Ë£Ì£Í£Î£Ï£Ð£Ñ£Ò£Ó£Ô£Õ£Ö£×£Ø£Ù£Ú\r"
+expect {
+ -ex "%a3%c1%a3%c2%a3%c3%a3%c4%a3%c5%a3%c6%a3%c7%a3%c8%a3%c9%a3%ca%a3%cb%a3%cc%a3%cd%a3%ce%a3%cf%a3%d0%a3%d1%a3%d2%a3%d3%a3%d4%a3%d5%a3%d6%a3%d7%a3%d8%a3%d9%a3%da (52)\r\n" { pass $test }
+}
+send "0123456789\r"
+expect {
+ -ex "%30%31%32%33%34%35%36%37%38%39 (10)\r\n" { pass $test }
+}
+send "£°£±£²£³£´£µ£¶£·£¸£¹\r"
+expect {
+ -ex "%a3%b1%a3%b2%a3%b3%a3%b4%a3%b5%a3%b6%a3%b7%a3%b8%a3%b9 (20)\r\n" { pass $test }
+}
+send "¡¡ \r"
+expect {
+ -ex "%a1%a1%20 (3)\r\n" { pass $test }
+}
+send "¥¢¥¤¥¦¥¨¥ª¥¬¥®¥°¥²¥´¥Ñ¥Ô¥×¥Ú¥Ý¥«¡«¥¡«¥¯¡«¥±¡«¥³¡«¥Ï¡¬¥Ò¡¬¥Õ¡¬¥Ø¡¬¥Û¡¬\r"
+expect {
+ -ex "%a5%a2%a5%a4%a5%a6%a5%a8%a5%aa%a5%ac%a5%ae%a5%b0%a5%b2%a5%b4%a5%d1%a5%d4%a5%d7%a5%da%a5%dd%a5%ab%a1%ab%a5%ad%a1%ab%a5%af%a1%ab%a5%b1%a1%ab%a5%b3%a1%ab%a5%cf%a1%ac%a5%d2%a1%ac%a5%d5%a1%ac%a5%d8%a1%ac%a5%db%a1%ac (70)\r\n" { pass $test }
+}
+send "±²³´µ¶Þ·Þ¸Þ¹ÞºÞÊßËßÌßÍßÎß\r"
+expect {
+ -ex "%8e%b1%8e%b2%8e%b3%8e%b4%8e%b5%8e%b6%8e%de%8e%b7%8e%de%8e%b8%8e%de%8e%b9%8e%de%8e%ba%8e%de%8e%ca%8e%df%8e%cb%8e%df%8e%cc%8e%df%8e%cd%8e%df%8e%ce%8e%df (50)\r\n" { pass $test }
+}
+send "¤¢¤¤¤¦¤¨¤ª¤¬¤®¤°¤²¤´¤Ñ¤Ô¤×¤Ú¤Ý¤«¡«¤¡«¤¯¡«¤±¡«¤³¡«¤Ï¡¬¤Ò¡¬¤Õ¡¬¤Ø¡¬¤Û¡¬\r"
+expect {
+ -ex "%a5%a2%a5%a4%a5%a6%a5%a8%a5%aa%a5%ac%a5%ae%a5%b0%a5%b2%a5%b4%a5%d1%a5%d4%a5%d7%a5%da%a5%dd%a5%ab%a1%ab%a5%ad%a1%ab%a5%af%a1%ab%a5%b1%a1%ab%a5%b3%a1%ab%a5%cf%a1%ac%a5%d2%a1%ac%a5%d5%a1%ac%a5%d8%a1%ac%a5%db%a1%ac (70)\r\n" { pass $test }
+}
+close
+
+set test "conversion between full-width hiraganas and full-width katakanas, flags for both directions are specified at a time"
+spawn tests/conv_kana EUC-JP "Cc"
+expect_after {
+ "\[^\r\n\]*\r\n" { fail $test }
+}
+send "£á£â£ã£ä£å£æ£ç£è£é£ê£ë£ì£í£î£ï£ð£ñ£ò£ó£ô£õ£ö£÷£ø£ù£ú\r"
+expect {
+ -ex "%a3%e1%a3%e2%a3%e3%a3%e4%a3%e5%a3%e6%a3%e7%a3%e8%a3%e9%a3%ea%a3%eb%a3%ec%a3%ed%a3%ee%a3%ef%a3%f0%a3%f1%a3%f2%a3%f3%a3%f4%a3%f5%a3%f6%a3%f7%a3%f8%a3%f9%a3%fa (52)\r\n" { pass $test }
+}
+send "£Á£Â£Ã£Ä£Å£Æ£Ç£È£É£Ê£Ë£Ì£Í£Î£Ï£Ð£Ñ£Ò£Ó£Ô£Õ£Ö£×£Ø£Ù£Ú\r"
+expect {
+ -ex "%a3%c1%a3%c2%a3%c3%a3%c4%a3%c5%a3%c6%a3%c7%a3%c8%a3%c9%a3%ca%a3%cb%a3%cc%a3%cd%a3%ce%a3%cf%a3%d0%a3%d1%a3%d2%a3%d3%a3%d4%a3%d5%a3%d6%a3%d7%a3%d8%a3%d9%a3%da (52)\r\n" { pass $test }
+}
+send "abcdefghijklmnopqrstuvwxyz\r"
+expect {
+ -ex "%61%62%63%64%65%66%67%68%69%6a%6b%6c%6d%6e%6f%70%71%72%73%74%75%76%77%78%79%7a (26)\r\n" { pass $test }
+}
+send "ABCDEFGHIJKLMNOPQRSTUVWXYZ\r"
+expect {
+ -ex "%41%42%43%44%45%46%47%48%49%4a%4b%4c%4d%4e%4f%50%51%52%53%54%55%56%57%58%59%5a (26)\r\n" { pass $test }
+}
+send "0123456789\r"
+expect {
+ -ex "%30%31%32%33%34%35%36%37%38%39 (10)\r\n" { pass $test }
+}
+send "£°£±£²£³£´£µ£¶£·£¸£¹\r"
+expect {
+ -ex "%a3%b1%a3%b2%a3%b3%a3%b4%a3%b5%a3%b6%a3%b7%a3%b8%a3%b9 (20)\r\n" { pass $test }
+}
+send "¡¡ \r"
+expect {
+ -ex "%a1%a1%20 (3)\r\n" { pass $test }
+}
+send "¥¢¥¤¥¦¥¨¥ª¥¬¥®¥°¥²¥´¥Ñ¥Ô¥×¥Ú¥Ý¥«¡«¥¡«¥¯¡«¥±¡«¥³¡«¥Ï¡¬¥Ò¡¬¥Õ¡¬¥Ø¡¬¥Û¡¬\r"
+expect {
+ -ex "%a4%a2%a4%a4%a4%a6%a4%a8%a4%aa%a4%ac%a4%ae%a4%b0%a4%b2%a4%b4%a4%d1%a4%d4%a4%d7%a4%da%a4%dd%a4%ab%a1%ab%a4%ad%a1%ab%a4%af%a1%ab%a4%b1%a1%ab%a4%b3%a1%ab%a4%cf%a1%ac%a4%d2%a1%ac%a4%d5%a1%ac%a4%d8%a1%ac%a4%db%a1%ac (70)\r\n" { pass $test }
+}
+send "±²³´µ¶Þ·Þ¸Þ¹ÞºÞÊßËßÌßÍßÎß\r"
+expect {
+ -ex "%8e%b1%8e%b2%8e%b3%8e%b4%8e%b5%8e%b6%8e%de%8e%b7%8e%de%8e%b8%8e%de%8e%b9%8e%de%8e%ba%8e%de%8e%ca%8e%df%8e%cb%8e%df%8e%cc%8e%df%8e%cd%8e%df%8e%ce%8e%df (50)\r\n" { pass $test }
+}
+send "¤¢¤¤¤¦¤¨¤ª¤¬¤®¤°¤²¤´¤Ñ¤Ô¤×¤Ú¤Ý¤«¡«¤¡«¤¯¡«¤±¡«¤³¡«¤Ï¡¬¤Ò¡¬¤Õ¡¬¤Ø¡¬¤Û¡¬\r"
+expect {
+ -ex "%a5%a2%a5%a4%a5%a6%a5%a8%a5%aa%a5%ac%a5%ae%a5%b0%a5%b2%a5%b4%a5%d1%a5%d4%a5%d7%a5%da%a5%dd%a5%ab%a1%ab%a5%ad%a1%ab%a5%af%a1%ab%a5%b1%a1%ab%a5%b3%a1%ab%a5%cf%a1%ac%a5%d2%a1%ac%a5%d5%a1%ac%a5%d8%a1%ac%a5%db%a1%ac (70)\r\n" { pass $test }
+}
+close
+
+set test "half-width katakanas to full-width katakanas, with voiced marks combined"
+spawn tests/conv_kana EUC-JP "KV"
+expect_after {
+ "\[^\r\n\]*\r\n" { fail $test }
+}
+send "£á£â£ã£ä£å£æ£ç£è£é£ê£ë£ì£í£î£ï£ð£ñ£ò£ó£ô£õ£ö£÷£ø£ù£ú\r"
+expect {
+ -ex "%a3%e1%a3%e2%a3%e3%a3%e4%a3%e5%a3%e6%a3%e7%a3%e8%a3%e9%a3%ea%a3%eb%a3%ec%a3%ed%a3%ee%a3%ef%a3%f0%a3%f1%a3%f2%a3%f3%a3%f4%a3%f5%a3%f6%a3%f7%a3%f8%a3%f9%a3%fa (52)\r\n" { pass $test }
+}
+send "£Á£Â£Ã£Ä£Å£Æ£Ç£È£É£Ê£Ë£Ì£Í£Î£Ï£Ð£Ñ£Ò£Ó£Ô£Õ£Ö£×£Ø£Ù£Ú\r"
+expect {
+ -ex "%a3%c1%a3%c2%a3%c3%a3%c4%a3%c5%a3%c6%a3%c7%a3%c8%a3%c9%a3%ca%a3%cb%a3%cc%a3%cd%a3%ce%a3%cf%a3%d0%a3%d1%a3%d2%a3%d3%a3%d4%a3%d5%a3%d6%a3%d7%a3%d8%a3%d9%a3%da (52)\r\n" { pass $test }
+}
+send "abcdefghijklmnopqrstuvwxyz\r"
+expect {
+ -ex "%61%62%63%64%65%66%67%68%69%6a%6b%6c%6d%6e%6f%70%71%72%73%74%75%76%77%78%79%7a (26)\r\n" { pass $test }
+}
+send "ABCDEFGHIJKLMNOPQRSTUVWXYZ\r"
+expect {
+ -ex "%41%42%43%44%45%46%47%48%49%4a%4b%4c%4d%4e%4f%50%51%52%53%54%55%56%57%58%59%5a (26)\r\n" { pass $test }
+}
+send "0123456789\r"
+expect {
+ -ex "%30%31%32%33%34%35%36%37%38%39 (10)\r\n" { pass $test }
+}
+send "£°£±£²£³£´£µ£¶£·£¸£¹\r"
+expect {
+ -ex "%a3%b1%a3%b2%a3%b3%a3%b4%a3%b5%a3%b6%a3%b7%a3%b8%a3%b9 (20)\r\n" { pass $test }
+}
+send "¡¡ \r"
+expect {
+ -ex "%a1%a1%20 (3)\r\n" { pass $test }
+}
+send "¥¢¥¤¥¦¥¨¥ª¥¬¥®¥°¥²¥´¥Ñ¥Ô¥×¥Ú¥Ý¥«¡«¥¡«¥¯¡«¥±¡«¥³¡«¥Ï¡¬¥Ò¡¬¥Õ¡¬¥Ø¡¬¥Û¡¬\r"
+expect {
+ -ex "%a5%a2%a5%a4%a5%a6%a5%a8%a5%aa%a5%ac%a5%ae%a5%b0%a5%b2%a5%b4%a5%d1%a5%d4%a5%d7%a5%da%a5%dd%a5%ab%a1%ab%a5%ad%a1%ab%a5%af%a1%ab%a5%b1%a1%ab%a5%b3%a1%ab%a5%cf%a1%ac%a5%d2%a1%ac%a5%d5%a1%ac%a5%d8%a1%ac%a5%db%a1%ac (70)\r\n" { pass $test }
+}
+send "±²³´µ¶Þ·Þ¸Þ¹ÞºÞÊßËßÌßÍßÎß\r"
+expect {
+ -ex "%a5%a2%a5%a4%a5%a6%a5%a8%a5%aa%a5%ac%a5%ae%a5%b0%a5%b2%a5%b4%a5%d1%a5%d4%a5%d7%a5%da%a5%dd (30)\r\n" { pass $test }
+}
+send "¤¢¤¤¤¦¤¨¤ª¤¬¤®¤°¤²¤´¤Ñ¤Ô¤×¤Ú¤Ý¤«¡«¤¡«¤¯¡«¤±¡«¤³¡«¤Ï¡¬¤Ò¡¬¤Õ¡¬¤Ø¡¬¤Û¡¬\r"
+expect {
+ -ex "%a4%a2%a4%a4%a4%a6%a4%a8%a4%aa%a4%ac%a4%ae%a4%b0%a4%b2%a4%b4%a4%d1%a4%d4%a4%d7%a4%da%a4%dd%a4%ab%a1%ab%a4%ad%a1%ab%a4%af%a1%ab%a4%b1%a1%ab%a4%b3%a1%ab%a4%cf%a1%ac%a4%d2%a1%ac%a4%d5%a1%ac%a4%d8%a1%ac%a4%db%a1%ac (70)\r\n" { pass $test }
+}
+close
+
+set test "half-width katakanas to full-width hiraganas, with voiced marks combined"
+spawn tests/conv_kana EUC-JP "HV"
+expect_after {
+ "\[^\r\n\]*\r\n" { fail $test }
+}
+send "£á£â£ã£ä£å£æ£ç£è£é£ê£ë£ì£í£î£ï£ð£ñ£ò£ó£ô£õ£ö£÷£ø£ù£ú\r"
+expect {
+ -ex "%a3%e1%a3%e2%a3%e3%a3%e4%a3%e5%a3%e6%a3%e7%a3%e8%a3%e9%a3%ea%a3%eb%a3%ec%a3%ed%a3%ee%a3%ef%a3%f0%a3%f1%a3%f2%a3%f3%a3%f4%a3%f5%a3%f6%a3%f7%a3%f8%a3%f9%a3%fa (52)\r\n" { pass $test }
+}
+send "£Á£Â£Ã£Ä£Å£Æ£Ç£È£É£Ê£Ë£Ì£Í£Î£Ï£Ð£Ñ£Ò£Ó£Ô£Õ£Ö£×£Ø£Ù£Ú\r"
+expect {
+ -ex "%a3%c1%a3%c2%a3%c3%a3%c4%a3%c5%a3%c6%a3%c7%a3%c8%a3%c9%a3%ca%a3%cb%a3%cc%a3%cd%a3%ce%a3%cf%a3%d0%a3%d1%a3%d2%a3%d3%a3%d4%a3%d5%a3%d6%a3%d7%a3%d8%a3%d9%a3%da (52)\r\n" { pass $test }
+}
+send "abcdefghijklmnopqrstuvwxyz\r"
+expect {
+ -ex "%61%62%63%64%65%66%67%68%69%6a%6b%6c%6d%6e%6f%70%71%72%73%74%75%76%77%78%79%7a (26)\r\n" { pass $test }
+}
+send "ABCDEFGHIJKLMNOPQRSTUVWXYZ\r"
+expect {
+ -ex "%41%42%43%44%45%46%47%48%49%4a%4b%4c%4d%4e%4f%50%51%52%53%54%55%56%57%58%59%5a (26)\r\n" { pass $test }
+}
+send "0123456789\r"
+expect {
+ -ex "%30%31%32%33%34%35%36%37%38%39 (10)\r\n" { pass $test }
+}
+send "£°£±£²£³£´£µ£¶£·£¸£¹\r"
+expect {
+ -ex "%a3%b1%a3%b2%a3%b3%a3%b4%a3%b5%a3%b6%a3%b7%a3%b8%a3%b9 (20)\r\n" { pass $test }
+}
+send "¡¡ \r"
+expect {
+ -ex "%a1%a1%20 (3)\r\n" { pass $test }
+}
+send "¥¢¥¤¥¦¥¨¥ª¥¬¥®¥°¥²¥´¥Ñ¥Ô¥×¥Ú¥Ý¥«¡«¥¡«¥¯¡«¥±¡«¥³¡«¥Ï¡¬¥Ò¡¬¥Õ¡¬¥Ø¡¬¥Û¡¬\r"
+expect {
+ -ex "%a5%a2%a5%a4%a5%a6%a5%a8%a5%aa%a5%ac%a5%ae%a5%b0%a5%b2%a5%b4%a5%d1%a5%d4%a5%d7%a5%da%a5%dd%a5%ab%a1%ab%a5%ad%a1%ab%a5%af%a1%ab%a5%b1%a1%ab%a5%b3%a1%ab%a5%cf%a1%ac%a5%d2%a1%ac%a5%d5%a1%ac%a5%d8%a1%ac%a5%db%a1%ac (70)\r\n" { pass $test }
+}
+send "±²³´µ¶Þ·Þ¸Þ¹ÞºÞÊßËßÌßÍßÎß\r"
+expect {
+ -ex "%a4%a2%a4%a4%a4%a6%a4%a8%a4%aa%a4%ac%a4%ae%a4%b0%a4%b2%a4%b4%a4%d1%a4%d4%a4%d7%a4%da%a4%dd (30)\r\n" { pass $test }
+}
+send "¤¢¤¤¤¦¤¨¤ª¤¬¤®¤°¤²¤´¤Ñ¤Ô¤×¤Ú¤Ý¤«¡«¤¡«¤¯¡«¤±¡«¤³¡«¤Ï¡¬¤Ò¡¬¤Õ¡¬¤Ø¡¬¤Û¡¬\r"
+expect {
+ -ex "%a4%a2%a4%a4%a4%a6%a4%a8%a4%aa%a4%ac%a4%ae%a4%b0%a4%b2%a4%b4%a4%d1%a4%d4%a4%d7%a4%da%a4%dd%a4%ab%a1%ab%a4%ad%a1%ab%a4%af%a1%ab%a4%b1%a1%ab%a4%b3%a1%ab%a4%cf%a1%ac%a4%d2%a1%ac%a4%d5%a1%ac%a4%d8%a1%ac%a4%db%a1%ac (70)\r\n" { pass $test }
+}
+close
+
+
+# vim: sts=4 ts=4 sw=4 et encoding=EUC-JP
diff --git a/ext/mbstring/libmbfl/tests/strwidth.tests/strwidth.exp b/ext/mbstring/libmbfl/tests/strwidth.tests/strwidth.exp
new file mode 100644
index 0000000000..09f518c07b
--- /dev/null
+++ b/ext/mbstring/libmbfl/tests/strwidth.tests/strwidth.exp
@@ -0,0 +1,47 @@
+#!/usr/bin/expect -f
+spawn tests/strwidth Japanese UTF-8
+set timeout 1
+
+expect_after {
+ "\[^\r\n\]*\r\n" { fail $test }
+}
+
+set test "basic test"
+send "testtest\r"
+expect {
+ "8\r\n" { pass $test }
+}
+
+set test "CJK kanji"
+send "æ¼¢å\r"
+expect {
+ "4\r\n" { pass $test }
+}
+
+set test "CJK hiragana"
+send "ã²ãããª\r"
+expect {
+ "8\r\n" { pass $test }
+}
+
+set test "CJK katakana"
+send "ã«ã¿ã«ã\r"
+expect {
+ "8\r\n" { pass $test }
+}
+
+set test "Fullwidth symbols (1)"
+send "ãï¼âï¼ï¼ï¼
ï¼âï¼ï¼ï¼\r"
+expect {
+ "20\r\n" { pass $test }
+}
+
+set test "Halfwidth symbols assumed to be fullwidth in JISX0208 (2)"
+send "âââ\r"
+expect {
+ "3\r\n" { pass $test }
+}
+
+close
+
+# vim: sts=4 sw=4 ts=4 et encoding=UTF-8
diff --git a/ext/mbstring/mbstring.c b/ext/mbstring/mbstring.c
index e7e5c2abe5..c73b9d8f93 100644
--- a/ext/mbstring/mbstring.c
+++ b/ext/mbstring/mbstring.c
@@ -151,9 +151,16 @@ static const enum mbfl_no_encoding php_mb_default_identify_list_hy[] = {
static const enum mbfl_no_encoding php_mb_default_identify_list_tr[] = {
mbfl_no_encoding_ascii,
mbfl_no_encoding_utf8,
+ mbfl_no_encoding_cp1254,
mbfl_no_encoding_8859_9
};
+static const enum mbfl_no_encoding php_mb_default_identify_list_ua[] = {
+ mbfl_no_encoding_ascii,
+ mbfl_no_encoding_utf8,
+ mbfl_no_encoding_koi8u
+};
+
static const enum mbfl_no_encoding php_mb_default_identify_list_neut[] = {
mbfl_no_encoding_ascii,
mbfl_no_encoding_utf8
@@ -168,6 +175,7 @@ static const php_mb_nls_ident_list php_mb_default_identify_list[] = {
{ mbfl_no_language_russian, php_mb_default_identify_list_ru, sizeof(php_mb_default_identify_list_ru) / sizeof(php_mb_default_identify_list_ru[0]) },
{ mbfl_no_language_armenian, php_mb_default_identify_list_hy, sizeof(php_mb_default_identify_list_hy) / sizeof(php_mb_default_identify_list_hy[0]) },
{ mbfl_no_language_turkish, php_mb_default_identify_list_tr, sizeof(php_mb_default_identify_list_tr) / sizeof(php_mb_default_identify_list_tr[0]) },
+ { mbfl_no_language_ukrainian, php_mb_default_identify_list_ua, sizeof(php_mb_default_identify_list_ua) / sizeof(php_mb_default_identify_list_ua[0]) },
{ mbfl_no_language_neutral, php_mb_default_identify_list_neut, sizeof(php_mb_default_identify_list_neut) / sizeof(php_mb_default_identify_list_neut[0]) }
};
@@ -360,7 +368,7 @@ ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_detect_encoding, 0, 0, 1)
ZEND_ARG_INFO(0, strict)
ZEND_END_ARG_INFO()
-ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_list_encodings, 0, 0, 0)
+ZEND_BEGIN_ARG_INFO(arginfo_mb_list_encodings, 0)
ZEND_END_ARG_INFO()
ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_encoding_aliases, 0, 0, 1)
@@ -1035,75 +1043,72 @@ static PHP_INI_MH(OnUpdate_mbstring_http_output)
int _php_mb_ini_mbstring_internal_encoding_set(const char *new_value, uint new_value_length TSRMLS_DC)
{
enum mbfl_no_encoding no_encoding;
- const char *enc_name = NULL;
- uint enc_name_len = 0;
-
- no_encoding = new_value ? mbfl_name2no_encoding(new_value):
- mbfl_no_encoding_invalid;
+ const char *enc_name = NULL;
+ uint enc_name_len = 0;
+
+ no_encoding = new_value ? mbfl_name2no_encoding(new_value):
+ mbfl_no_encoding_invalid;
if (no_encoding != mbfl_no_encoding_invalid) {
- enc_name = new_value;
- enc_name_len = new_value_length;
- } else {
- switch (MBSTRG(language)) {
- case mbfl_no_language_uni:
- enc_name = "UTF-8";
- enc_name_len = sizeof("UTF-8") - 1;
- break;
- case mbfl_no_language_japanese:
- enc_name = "EUC-JP";
- enc_name_len = sizeof("EUC-JP") - 1;
- break;
- case mbfl_no_language_korean:
- enc_name = "EUC-KR";
- enc_name_len = sizeof("EUC-KR") - 1;
- break;
- case mbfl_no_language_simplified_chinese:
- enc_name = "EUC-CN";
- enc_name_len = sizeof("EUC-CN") - 1;
- break;
- case mbfl_no_language_traditional_chinese:
- enc_name = "EUC-TW";
- enc_name_len = sizeof("EUC-TW") - 1;
- break;
- case mbfl_no_language_russian:
- enc_name = "KOI8-R";
- enc_name_len = sizeof("KOI8-R") - 1;
- break;
- case mbfl_no_language_german:
- enc_name = "ISO-8859-15";
- enc_name_len = sizeof("ISO-8859-15") - 1;
- break;
- case mbfl_no_language_armenian:
- enc_name = "ArmSCII-8";
- enc_name_len = sizeof("ArmSCII-8") - 1;
- break;
- case mbfl_no_language_turkish:
- enc_name = "ISO-8859-9";
- enc_name_len = sizeof("ISO-8859-9") - 1;
- break;
- default:
- enc_name = "ISO-8859-1";
- enc_name_len = sizeof("ISO-8859-1") - 1;
- break;
- }
- no_encoding = mbfl_name2no_encoding(enc_name);
- }
- MBSTRG(internal_encoding) = no_encoding;
- MBSTRG(current_internal_encoding) = no_encoding;
+ enc_name = new_value;
+ enc_name_len = new_value_length;
+ } else {
+ switch (MBSTRG(language)) {
+ case mbfl_no_language_uni:
+ enc_name = "UTF-8";
+ enc_name_len = sizeof("UTF-8") - 1;
+ break;
+ case mbfl_no_language_japanese:
+ enc_name = "EUC-JP";
+ enc_name_len = sizeof("EUC-JP") - 1;
+ break;
+ case mbfl_no_language_korean:
+ enc_name = "EUC-KR";
+ enc_name_len = sizeof("EUC-KR") - 1;
+ break;
+ case mbfl_no_language_simplified_chinese:
+ enc_name = "EUC-CN";
+ enc_name_len = sizeof("EUC-CN") - 1;
+ break;
+ case mbfl_no_language_traditional_chinese:
+ enc_name = "EUC-TW";
+ enc_name_len = sizeof("EUC-TW") - 1;
+ break;
+ case mbfl_no_language_russian:
+ enc_name = "KOI8-R";
+ enc_name_len = sizeof("KOI8-R") - 1;
+ break;
+ case mbfl_no_language_german:
+ enc_name = "ISO-8859-15";
+ enc_name_len = sizeof("ISO-8859-15") - 1;
+ break;
+ case mbfl_no_language_armenian:
+ enc_name = "ArmSCII-8";
+ enc_name_len = sizeof("ArmSCII-8") - 1;
+ break;
+ case mbfl_no_language_turkish:
+ enc_name = "ISO-8859-9";
+ enc_name_len = sizeof("ISO-8859-9") - 1;
+ break;
+ default:
+ enc_name = "ISO-8859-1";
+ enc_name_len = sizeof("ISO-8859-1") - 1;
+ break;
+ }
+ no_encoding = mbfl_name2no_encoding(enc_name);
+ }
+ MBSTRG(internal_encoding) = no_encoding;
+ MBSTRG(current_internal_encoding) = no_encoding;
#if HAVE_MBREGEX
{
- const char *_enc_name = enc_name;
- if (FAILURE == php_mb_regex_set_default_mbctype(_enc_name TSRMLS_CC)) {
+ const char *enc_name = new_value;
+ if (FAILURE == php_mb_regex_set_default_mbctype(enc_name TSRMLS_CC)) {
/* falls back to EUC-JP if an unknown encoding name is given */
- _enc_name = "EUC-JP";
- php_mb_regex_set_default_mbctype(_enc_name TSRMLS_CC);
+ enc_name = "EUC-JP";
+ php_mb_regex_set_default_mbctype(enc_name TSRMLS_CC);
}
- php_mb_regex_set_mbctype(_enc_name TSRMLS_CC);
+ php_mb_regex_set_mbctype(new_value TSRMLS_CC);
}
#endif
-#ifdef ZEND_MULTIBYTE
- zend_multibyte_set_internal_encoding(new_value, new_value_length TSRMLS_CC);
-#endif /* ZEND_MULTIBYTE */
return SUCCESS;
}
/* }}} */
@@ -1253,28 +1258,22 @@ PHP_INI_BEGIN()
PHP_INI_ENTRY("mbstring.script_encoding", NULL, PHP_INI_ALL, OnUpdate_mbstring_script_encoding)
#endif /* ZEND_MULTIBYTE */
PHP_INI_ENTRY("mbstring.substitute_character", NULL, PHP_INI_ALL, OnUpdate_mbstring_substitute_character)
- STD_PHP_INI_ENTRY("mbstring.func_overload", "0",
- PHP_INI_SYSTEM | PHP_INI_PERDIR,
- OnUpdateLong,
- func_overload,
- zend_mbstring_globals, mbstring_globals)
-
+ STD_PHP_INI_ENTRY("mbstring.func_overload", "0",
+ PHP_INI_SYSTEM, OnUpdateLong, func_overload, zend_mbstring_globals, mbstring_globals)
+
STD_PHP_INI_BOOLEAN("mbstring.encoding_translation", "0",
PHP_INI_SYSTEM | PHP_INI_PERDIR,
OnUpdate_mbstring_encoding_translation,
- encoding_translation,
- zend_mbstring_globals, mbstring_globals)
-
+ encoding_translation, zend_mbstring_globals, mbstring_globals)
PHP_INI_ENTRY("mbstring.http_output_conv_mimetypes",
- "^(text/|application/xhtml\\+xml)",
- PHP_INI_ALL,
- OnUpdate_mbstring_http_output_conv_mimetypes)
+ "^(text/|application/xhtml\\+xml)",
+ PHP_INI_ALL,
+ OnUpdate_mbstring_http_output_conv_mimetypes)
STD_PHP_INI_BOOLEAN("mbstring.strict_detection", "0",
PHP_INI_ALL,
OnUpdateLong,
- strict_detection,
- zend_mbstring_globals, mbstring_globals)
+ strict_detection, zend_mbstring_globals, mbstring_globals)
PHP_INI_END()
/* }}} */
@@ -1333,11 +1332,9 @@ static PHP_GSHUTDOWN_FUNCTION(mbstring)
if (mbstring_globals->detect_order_list) {
free(mbstring_globals->detect_order_list);
}
-
if (mbstring_globals->http_output_conv_mimetypes) {
_php_mb_free_regex(mbstring_globals->http_output_conv_mimetypes);
}
-
#if HAVE_MBREGEX
php_mb_regex_globals_free(mbstring_globals->mb_regex_globals TSRMLS_CC);
#endif
@@ -1450,6 +1447,7 @@ PHP_RINIT_FUNCTION(mbstring)
PHP_RINIT(mb_regex) (INIT_FUNC_ARGS_PASSTHRU);
#endif
#ifdef ZEND_MULTIBYTE
+ zend_multibyte_set_internal_encoding(mbfl_no_encoding2name(MBSTRG(internal_encoding)) TSRMLS_CC);
php_mb_set_zend_encoding(TSRMLS_C);
#endif /* ZEND_MULTIBYTE */
@@ -1578,7 +1576,7 @@ PHP_FUNCTION(mb_internal_encoding)
#ifdef ZEND_MULTIBYTE
/* TODO: make independent from mbstring.encoding_translation? */
if (MBSTRG(encoding_translation)) {
- zend_multibyte_set_internal_encoding(name, name_len TSRMLS_CC);
+ zend_multibyte_set_internal_encoding(name TSRMLS_CC);
}
#endif /* ZEND_MULTIBYTE */
RETURN_TRUE;
@@ -3071,18 +3069,13 @@ PHP_FUNCTION(mb_detect_encoding)
/* }}} */
/* {{{ proto mixed mb_list_encodings()
- Returns an array of all supported entity encodings or Returns the entity encoding as a string */
+ Returns an array of all supported entity encodings */
PHP_FUNCTION(mb_list_encodings)
{
const mbfl_encoding **encodings;
const mbfl_encoding *encoding;
int i;
- if (ZEND_NUM_ARGS() != 0) {
- RETVAL_FALSE;
- ZEND_WRONG_PARAM_COUNT();
- }
-
array_init(return_value);
i = 0;
encodings = mbfl_get_supported_encodings();
@@ -3319,8 +3312,8 @@ PHP_FUNCTION(mb_convert_variables)
int n, to_enc_len, argc, stack_level, stack_max, elistsz;
enum mbfl_no_encoding *elist;
char *name, *to_enc;
- void *ptmp;
-
+ void *ptmp;
+
if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "sZ+", &to_enc, &to_enc_len, &zfrom_enc, &args, &argc) == FAILURE) {
return;
}
@@ -3485,7 +3478,7 @@ detect_end:
ret = mbfl_buffer_converter_feed_result(convd, &string, &result);
if (ret != NULL) {
if (Z_REFCOUNT_PP(hash_entry) > 1) {
- Z_DELREF_P(*hash_entry);
+ Z_DELREF_PP(hash_entry);
MAKE_STD_ZVAL(*hash_entry);
} else {
zval_dtor(*hash_entry);
@@ -3864,7 +3857,7 @@ PHP_FUNCTION(mb_send_mail)
smart_str *s;
extern void mbfl_memory_device_unput(mbfl_memory_device *device);
char *pp, *ee;
-
+
/* initialize */
mbfl_memory_device_init(&device, 0, 0);
mbfl_string_init(&orig_str);
@@ -4501,8 +4494,7 @@ MBSTRING_API size_t php_mb_gpc_mbchar_bytes(const char *s TSRMLS_DC)
/* }}} */
/* {{{ MBSTRING_API int php_mb_gpc_encoding_converter() */
-MBSTRING_API int php_mb_gpc_encoding_converter(char **str, int *len, int num, const char *encoding_to, const char *encoding_from
- TSRMLS_DC)
+MBSTRING_API int php_mb_gpc_encoding_converter(char **str, int *len, int num, const char *encoding_to, const char *encoding_from TSRMLS_DC)
{
int i;
mbfl_string string, result, *ret = NULL;
@@ -4722,8 +4714,9 @@ MBSTRING_API int php_mb_stripos(int mode, const char *old_haystack, unsigned int
/* }}} */
#ifdef ZEND_MULTIBYTE
-/* {{{ MBSTRING_API int php_mb_set_zend_encoding() */
-MBSTRING_API int php_mb_set_zend_encoding(TSRMLS_D)
+
+/* {{{ php_mb_set_zend_encoding() */
+static int php_mb_set_zend_encoding(TSRMLS_D)
{
/* 'd better use mbfl_memory_device? */
char *name, *list = NULL;
@@ -4763,7 +4756,7 @@ MBSTRING_API int php_mb_set_zend_encoding(TSRMLS_D)
if (MBSTRG(encoding_translation)) {
/* notify internal encoding to Zend Engine */
name = (char*)mbfl_no_encoding2name(MBSTRG(current_internal_encoding));
- zend_multibyte_set_internal_encoding(name, strlen(name) TSRMLS_CC);
+ zend_multibyte_set_internal_encoding(name TSRMLS_CC);
}
zend_multibyte_set_functions(encoding_detector, encoding_converter, encoding_oddlen TSRMLS_CC);
@@ -4775,7 +4768,7 @@ MBSTRING_API int php_mb_set_zend_encoding(TSRMLS_D)
/* {{{ char *php_mb_encoding_detector()
* Interface for Zend Engine
*/
-char* php_mb_encoding_detector(const char *arg_string, int arg_length, char *arg_list TSRMLS_DC)
+static char* php_mb_encoding_detector(const unsigned char *arg_string, size_t arg_length, char *arg_list TSRMLS_DC)
{
mbfl_string string;
const char *ret;
@@ -4798,7 +4791,7 @@ char* php_mb_encoding_detector(const char *arg_string, int arg_length, char *arg
mbfl_string_init(&string);
string.no_language = MBSTRG(language);
- string.val = (char*)arg_string;
+ string.val = (unsigned char *)arg_string;
string.len = arg_length;
ret = mbfl_identify_encoding_name(&string, elist, size, 0);
if (list != NULL) {
@@ -4813,9 +4806,9 @@ char* php_mb_encoding_detector(const char *arg_string, int arg_length, char *arg
/* }}} */
/* {{{ int php_mb_encoding_converter() */
-int php_mb_encoding_converter(char **to, int *to_length, const char *from,
- int from_length, const char *encoding_to, const char *encoding_from
- TSRMLS_DC)
+static int php_mb_encoding_converter(unsigned char **to, size_t *to_length,
+ const unsigned char *from, size_t from_length,
+ const char *encoding_to, const char *encoding_from TSRMLS_DC)
{
mbfl_string string, result, *ret;
enum mbfl_no_encoding from_encoding, to_encoding;
@@ -4836,7 +4829,7 @@ int php_mb_encoding_converter(char **to, int *to_length, const char *from,
mbfl_string_init(&result);
string.no_encoding = from_encoding;
string.no_language = MBSTRG(language);
- string.val = (char*)from;
+ string.val = (unsigned char*)from;
string.len = from_length;
/* initialize converter */
@@ -4865,14 +4858,14 @@ int php_mb_encoding_converter(char **to, int *to_length, const char *from,
* returns number of odd (e.g. appears only first byte of multibyte
* character) chars
*/
-int php_mb_oddlen(const char *string, int length, const char *encoding TSRMLS_DC)
+static size_t php_mb_oddlen(const unsigned char *string, size_t length, const char *encoding TSRMLS_DC)
{
mbfl_string mb_string;
mbfl_string_init(&mb_string);
mb_string.no_language = MBSTRG(language);
mb_string.no_encoding = mbfl_name2no_encoding(encoding);
- mb_string.val = (char*)string;
+ mb_string.val = (unsigned char *)string;
mb_string.len = length;
if (mb_string.no_encoding == mbfl_no_encoding_invalid) {
diff --git a/ext/mbstring/mbstring.h b/ext/mbstring/mbstring.h
index cd6cc63b3f..c536183538 100644
--- a/ext/mbstring/mbstring.h
+++ b/ext/mbstring/mbstring.h
@@ -217,16 +217,6 @@ struct mb_overload_def {
#define MBSTRG(v) (mbstring_globals.v)
#endif
-#ifdef ZEND_MULTIBYTE
-MBSTRING_API int php_mb_set_zend_encoding(TSRMLS_D);
-char* php_mb_encoding_detector(const char *string, int length, char *list
- TSRMLS_DC);
-int php_mb_encoding_converter(char **to, int *to_length, const char *from,
- int from_length, const char *encoding_to, const char *encoding_from
- TSRMLS_DC);
-int php_mb_oddlen(const char *string, int length, const char *encoding TSRMLS_DC);
-#endif /* ZEND_MULTIBYTE */
-
#else /* HAVE_MBSTRING */
#define mbstring_module_ptr NULL
diff --git a/ext/mbstring/oniguruma/COPYING b/ext/mbstring/oniguruma/COPYING
index ed3fa53b25..4d321bb93b 100644
--- a/ext/mbstring/oniguruma/COPYING
+++ b/ext/mbstring/oniguruma/COPYING
@@ -1,4 +1,4 @@
-OniGuruma LICENSE
+Oniguruma LICENSE
-----------------
When this software is partly used or it is distributed with Ruby,
@@ -6,7 +6,7 @@ this of Ruby follows the license of Ruby.
It follows the BSD license in the case of the one except for it.
/*-
- * Copyright (c) 2002-2004 K.Kosako
+ * Copyright (c) 2002-2006 K.Kosako
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
diff --git a/ext/mbstring/oniguruma/HISTORY b/ext/mbstring/oniguruma/HISTORY
index c648c54551..a1debefa49 100644
--- a/ext/mbstring/oniguruma/HISTORY
+++ b/ext/mbstring/oniguruma/HISTORY
@@ -1,5 +1,457 @@
History
+2007/08/16: Version 4.7.1
+
+2007/08/16: [test] success in ruby 1.9.0 (2007-04-06) [i686-linux].
+2007/07/04: [spec] (thanks K.Takata)
+ ONIG_OPTION_SINGLELINE: '$' -> '\Z' (as Perl)
+2007/07/04: [dist] (thanks K.Takata)
+ fix documents API and API.ja.
+
+2007/06/18: Version 4.7.0
+
+2007/06/18: [test] success in ruby 1.9.0 (2007-04-06) [i686-linux].
+2007/06/18: [bug] (thanks KUBO Takehiro)
+ WORD_ALIGNMENT_SIZE must be sizeof(OnigCodePoint).
+2007/06/05: [impl] add #ifndef vsnprintf in regint.h.
+2007/06/05: [bug] should check USE_CRNL_AS_LINE_TERMINATOR case
+ in onig_search().
+
+2007/04/12: Version 4.6.2
+
+2007/04/09: [impl] change STATE_CHECK_BUFF_MAX_SIZE value from 0x8000
+ to 0x4000.
+2007/03/26: [impl] add 'void' to function declarations.
+
+2007/03/06: Version 4.6.1
+
+2007/03/06: [test] success in ruby 1.9.0 (2006-10-23) [i686-linux].
+2007/03/06: [bug] add #include for bcc32.
+ (In bcc32, alloca() is declared in malloc.h.)
+2007/03/06: [impl] remove including version.h of Ruby.
+2007/03/02: [bug] invalid optimization for semi-end-buf in onig_search().
+ ex. /\n\Z/.match("aaaaaaaaaa\n")
+2007/03/02: [impl] move range > start check position in end_buf process.
+
+2007/02/08: Version 4.6.0
+
+2007/02/08: [test] success in ruby 1.9.0 (2006-10-23) [i686-linux].
+2007/01/09: [tune] select_opt_exact_info() didn't work for empty info.
+ ex. /.a/ make MAP info instead of EXACT info.
+2006/12/29: [impl] add print_enc_string() for ONIG_DEBUG mode.
+2006/12/22: [spec] should check too short multibyte char in parse_exp().
+ add USE_PAD_TO_SHORT_BYTE_CHAR.
+ ex. /\x00/ in UTF16 should be error.
+
+2006/11/17: Version 4.5.1
+
+2006/11/17: [test] success in ruby 1.9.0 (2006-10-23) [i686-linux].
+2006/11/15: [impl] remove CHECK_INTERRUPT.
+2006/11/10: [bug] 0x24, 0x2b, 0x3c, 0x3d, 0x3e, 0x5e, 0x60, 0x7c, 0x7e
+ should be [:punct:].
+2006/11/08: [impl] rename QUALIFIER -> QUANTIFIER.
+2006/11/07: [bug] (thanks Byte)
+ add 0xa3 <=> 0xb3 to CaseFoldMap[] for KOI8-R.
+
+2006/11/06: Version 4.5.0
+
+2006/11/06: [test] success in ruby 1.9.0 (2006-10-23) [i686-linux].
+2006/11/06: [API] remove ONIGENC_AMBIGUOUS_MATCH_COMPOUND.
+2006/11/06: [spec] change ONIG_OPTION_FIND_LONGEST to search all of
+ the string range.
+ add USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE.
+
+2006/10/30: Version 4.4.6
+
+2006/10/30: [test] success in ruby 1.9.0 (2006-10-23) [i686-linux].
+2006/10/30: [impl] (thanks K.Takata)
+ add THREAD_SYSTEM_INIT and THREAD_SYSTEM_END.
+2006/10/30: [bug] (thanks Wolfgang Nadasi-Donner)
+ invalid offset value was used in STATE_CHECK_BUFF_INIT().
+
+2006/10/24: Version 4.4.5
+
+2006/10/24: [test] success in ruby 1.9.0 (2006-10-23) [i686-linux].
+2006/10/24: [impl] escape -Wall warning.
+2006/10/24: [tune] (thanks Kornelius Kalnbach)
+ String#scan for long string needs long time compare with
+ old Ruby
+ by initialization time for combination explosion check
+ ex. ("test " * 100_000).scan(/\w*\s?/)
+ change STATE_CHECK_BUFF_MAX_SIZE from 0x8000000 to 0x8000.
+ reduce initialization area of state_check_buff.
+2006/10/16: [bug] (thanks Akinori Musha)
+ first argument of rb_warn() should be format string.
+2006/10/10: [impl] add msa.state_check_buff_size initialization
+ in onig_search().
+2006/10/10: [bug] should call onig_st_free_table() in
+ onig_free_shared_cclass_table().
+2006/10/10: [impl] remove OP_WORD_SB and OP_WORD_MB.
+2006/09/29: [impl] initialize state_check_buff_size in STATE_CHECK_BUFF_INIT().
+ make valgrind happy.
+2006/09/22: [impl] convert to ascii for parameter string in
+ onig_error_code_to_str().
+ add enc member into OnigErrorInfo.
+
+2006/09/19: Version 4.4.4
+
+2006/09/19: [test] success in ruby 1.9.0 (2006-08-22) [i686-linux].
+2006/09/19: [impl] (thanks KOYAMA Tetsuji)
+ HAVE_STDARG_PROTOTYPES was not defined in Mac OS X
+ by Xcode 2.4(gcc 4.0.1) problem. [php-dev 1312] etc...
+
+2006/09/15: Version 4.4.3
+
+2006/09/15: [test] success in ruby 1.9.0 (2006-08-22) [i686-linux].
+2006/09/15: [bug] (thanks Allan Odgaard)
+ out of range access in bm_search_notrev().
+ (p < s)
+
+2006/09/08: Version 4.4.2
+
+2006/09/08: [test] success in ruby 1.9.0 (2006-08-22) [i686-linux].
+2006/09/08: [bug] (thanks K.Takata)
+ out of range access in bm_search_notrev().
+2006/09/04: [spec] (thanks K.Takata)
+ allow look-behind in negative look-behind.
+ ex. /(? (?:a*){n,n}, (?:a+){n,n}
+2006/09/21: [impl] reduce (a*){n,m}, (a+){n,m} => (a*){n,n}, (a+){n,n}
+ if backreference is not used.
+2006/08/17: [bug] should check scan_env.num_call > 0 for backrefed pattern
+ in combination explosion check.
+
+2006/08/17: Version 4.3.0
+
+2006/08/17: [test] success in ruby 1.9.0 (2006-07-28) [i686-linux].
+2006/08/17: [new] add config USE_COMBINATION_EXPLOSION_CHECK.
+ check /(.+)*/, /(\s*foo\s*)*/ etc...
+ [API] add num_comb_exp_check member in regex_t.
+ [dist] change LTVERSION value to "1:0:0" in configure.in.
+2006/08/15: [bug] OP_REPEAT_INC process in match_at().
+ should check repeat-count >= range-upper and
+ range-upper may be infinite.
+
+2006/08/11: Version 4.2.3
+
+2006/08/11: [test] success in ruby 1.9.0 (2006-07-28) [i686-linux].
+2006/08/10: [impl] remove double call in set_qualifier().
+2006/08/10: [impl] remove by_number member in QualifierNode.
+2006/08/09: [impl] remove a comma at the end of enum ReduceType
+ for escape warning on Mac OS X.
+2006/08/07: [impl] remove warning in regcomp.c.
+2006/08/07: [spec] move definition of USE_BACKREF_AT_LEVEL into NOT_RUBY.
+
+2006/08/03: Version 4.2.2
+
+2006/08/03: [test] success in ruby 1.9.0 (2006-07-28) [i686-linux].
+2006/08/03: [bug] (thanks Hiroyuki Yamamoto)
+ segmentation fault in regexec(). (POSIX API)
+2006/08/02: [bug] combination of \G in look-ahead/look-behind and other
+ anchors(\A, \z, \Z) cause invalid result.
+ ex. /(?!\G)a\z/.match("ba")
+ start arg. of MATCH_ARG_INIT() should be original
+ arg. of onig_search().
+
+2006/07/31: Version 4.2.1
+
+2006/07/31: [test] success in ruby 1.9.0 (2006-07-28) [i686-linux].
+2006/07/31: [bug] (thanks Kimura Minoru)
+ re-implement bm_search_notrev().
+2006/07/31: [impl] bm_search_notrev() refactoring.
+2006/07/31: [bug] (thanks Kimura Minoru)
+ fix incomplete multibyte string in exact info.
+2006/07/31: [impl] (thanks Seiji Masugata)
+ remove cast in va_init_list() for Intel C Compiler.
+
+2006/07/18: Version 4.2.0
+
+2006/07/18: [test] success in ruby 1.9.0 (2006-03-01) [i686-linux].
+2006/07/18: [new] (thanks Wolfgang Nadasi-Donner)
+ add back reference with nest level.
+ \k, \k
+2006/07/11: [impl] change long to unsigned long for ONIG_OPTION_XXX
+ and ONIG_SYN_XXX number literals.
+
+2006/07/03: Version 4.1.2
+
+2006/07/03: [test] success in ruby 1.9.0 (2006-03-01) [i686-linux].
+2006/07/03: [spec] (thanks Wolfgang Nadasi-Donner)
+ allow \G in look-behind.
+ add ANCHOR_BEGIN_POSITION flag in setup_tree().
+2006/06/12: [impl] (thanks matz)
+ fix cast from char* to const char*
+ in onig_snprintf_with_pattern().
+ fix cast from char* to const char*
+ for PopularQStr[] and ReduceQStr[].
+
+2006/05/22: Version 4.1.1
+
+2006/05/22: [test] success in ruby 1.9.0 (2006-03-01) [i686-linux].
+2006/05/22: [impl] add position string argument to STACK_BASE_CHECK().
+2006/05/22: [bug] (thanks NARUSE, Yui)
+ add STK_NULL_CHECK_END to IS_TO_VOID_TARGET().
+ ex. core dump in
+ /(?\(([^\(\)]++|\g)*+\))/.match('((a))')
+
+2006/05/15: Version 4.1.0
+
+2006/05/15: [test] success in ruby 1.9.0 (2006-03-01) [i686-linux].
+2006/05/15: [impl] thread atomic changes for onig_end() and
+ onig_free_node_list().
+2006/05/15: [test] success in ruby 1.9.0 (2006-03-01) [i686-linux].
+2005/05/15: [dist] update API, API.ja, FAQ, FAQ.ja.
+2006/05/15: [spec] remove onig_recompile(), onig_recompile_deluxe()
+ and re_recompile_pattern().
+ add config USE_RECOMPILE_API.
+2006/05/15: [impl] improved thread safe implementation of onig_search()
+ and onig_match().
+
+2006/05/11: Version 4.0.4
+
+2006/05/11: [test] success in ruby 1.9.0 (2006-03-01) [i686-linux].
+2006/05/11: [bug] (thanks Yuji Kaneda)
+ dead-lock in onig_end().
+2006/05/11: [dist] update index.html.
+
+2006/05/08: Version 4.0.3
+
+2006/05/08: [test] success in ruby 1.9.0 (2006-03-01) [i686-linux].
+2006/05/08: [bug] (thanks Allan Odgaard)
+ Segmentation fault in backward search.
+ ex. /^\t.*$/
+2006/04/18: [dist] update index.html.
+2006/04/05: [dist] update index.html.
+2006/03/24: [dist] update doc/RE, doc/RE.ja.
+
+2006/03/23: Version 4.0.2
+
+2006/03/22: [test] success in ruby 1.9.0 (2006-03-01) [i686-linux].
+2006/03/22: [impl] add both of ONIG_OPTION_DONT_CAPTURE_GROUP
+ and ONIG_OPTION_CAPTURE_GROUP check.
+2006/03/22: [spec] add error code ONIGERR_INVALID_COMBINATION_OF_OPTIONS.
+2006/03/22: [impl] remove USE_NAMED_GROUP condition from
+ ONIG_OPTION_DONT_CAPTURE_GROUP check in parse_effect().
+2006/03/22: [new] add API onig_noname_group_capture_is_active().
+2006/03/01: [spec] rename regex object type from regex_t to OnigRegexType.
+ add typedef OnigRegexType regex_t
+ unless ONIG_ESCAPE_REGEX_T_COLLISION is defined.
+2006/02/27: [spec] change ONIG_MAX_MULTI_BYTE_RANGES_NUM from 1000
+ to 10000. (for docdiff program)
+2006/02/17: [dist] change COPYING year 2005 -> 2006.
+
+2006/02/07: Version 4.0.1
+
+2006/02/07: [test] success in ruby 1.9.0 (2005-11-28) [i686-linux].
+2006/02/07: [bug] memory leaks in onig_free_shared_cclass_table().
+2006/02/03: [ruby] add -m 0644 option to install command in "make 19".
+2006/02/03: [impl] rename ANCHOR_ANYCHAR_STAR_PL to ANCHOR_ANYCHAR_STAR_ML.
+ change from IS_POSIXLINE() to IS_MULTILINE()
+ for ANCHOR_ANYCHAR_START/_ML decision
+ in optimize_node_left().
+2006/01/26: [dist] update index.html for Oniguruma 2.5.3.
+2006/01/25: [dist] update URL in index.html.
+
+2006/01/24: Version 4.0.0
+
+2006/01/24: [test] success in ruby 1.9.0 (2005-11-28) [i386-cygwin].
+2006/01/24: [test] success in ruby 1.9.0 (2005-11-28) [i686-linux].
+2006/01/24: [dist] remove warnings from sample/encode.c.
+2006/01/24: [dist] change install description in README(.ja).
+2006/01/24: [dist] remove re.c.XXX.patch from distribution and CVS.
+2006/01/24: [dist] --- support shared library ---
+ use GNU libtool/automake.
+ change configure.in and add Makefile.am, sample/Makefile.am.
+ add AUTHORS file.
+2006/01/24: [dist] test programs return exit code -1 when test fails.
+2006/01/24: [bug] (thanks KIMURA Koichi)
+ invalid syntax definition in ONIG_SYNTAX_GREP.
+ ONIG_SYN_OP_BRACE_INTERVAL
+ -> ONIG_SYN_OP_ESC_BRACE_INTERVAL
+2006/01/23: [dist] fix configure.in for onig-config.
+2006/01/19: [new] add new config USE_UNICODE_ALL_LINE_TERMINATORS.
+ (U+000d, U+0085, U+2028, U+2029)
+2005/12/29: [dist] change pmatch array size to 25 in testconv.rb.
+2005/12/26: [dist] fix name in test.rb.
+2005/12/26: [dist] update index.html for 2.5.1.
+
+2005/11/29: Version 3.9.1
+
+2005/11/29: [test] success in ruby 1.9.0 (2005-11-28) [i686-linux].
+2005/11/24: [test] success in ruby 1.9.0 (2005-08-09) [i686-linux].
+2005/11/21: [test] success in ruby 1.9.0 (2005-11-20) [i386-cygwin].
+2005/11/21: [bug] (thanks Allan Odgaard)
+ utf-8 character comments in extended mode leads
+ invalid result.
+ ex. /(?x)(?<= # o\n~) /
+ fix onigenc_unicode_is_code_ctype() and
+ utf8_is_code_ctype().
+2005/11/20: [bug] (thanks MATSUMOTO Satoshi) (thanks Isao Sonobe)
+ begin-line anchor and BM search optimization leads
+ invalid result in UTF-16/32.
+ fix in set_optimize_exact_info().
+
+2005/11/20: Version 3.9.0
+
+2005/11/20: [test] success in ruby 1.9.0 (2005-11-20) [i386-cygwin].
+2005/11/20: [test] success in ruby 1.9.0 (2005-10-18) [i386-cygwin].
+2005/11/20: [new] add new config USE_CRNL_AS_LINE_TERMINATOR.
+ (!!! NO SUPPORT experimental option !!!)
+2005/11/15: [bug] (thanks Allan Odgaard)
+ tok->escape was not cleared in fetch_token_in_cc().
+ ex. [\s&&[^\n]] makes wrong result.
+2005/10/18: [impl] (thanks nobu)
+ change sjis_mbc_enc_len()
+ and node_new_cclass_by_codepoint_range() scope to static.
+2005/09/05: [dist] remove link to MultiFind.
+2005/09/01: [dist] add link to yagrep.
+
+2005/08/23: Version 3.8.9
+
+2005/08/23: [test] success in ruby 1.9.0 (2005-08-09) [i686-linux].
+2005/08/23: [inst] fix Makefile.in for make ctest/ptest.
+
+2005/08/23: Version 3.8.8
+
+2005/08/23: [test] success in ruby 1.9.0 (2005-08-09) [i686-linux].
+2005/08/23: [impl] split is_code_in_cc() from onig_is_code_in_cc().
+2005/08/23: [impl] should check DATA_ENSURE() at OP_CCLASS_NODE in match_at().
+2005/08/23: [impl] (thanks akr)
+ add ONIG_OPTION_MAXBIT for escape conflict with
+ Ruby's option.
+2005/08/22: [impl] escape GCC 4.0 warnings for testc.c.
+2005/08/22: [bug] (thanks nobu, matz) [ruby-dev:26840]
+ UTF-8 0xFE, 0xFF handling bug in code_is_in_cclass_node().
+ abort on /\S*/ =~ "\xfe"
+2005/08/22: [impl] escape GCC 4.0 warnings for sample/*.c.
+2005/08/22: [impl] fix testconvu.rb.
+2005/08/22: [impl] escape GCC 4.0 warnings.
+
+2005/08/09: Version 3.8.7
+
+2005/08/09: [test] success in ruby 1.9.0 (2005-08-09) [i686-linux].
+2005/08/09: [bug] (thanks Allan Odgaard)
+ should not call enc_len() for s == range
+ in onig_search().
+2005/08/01: [dist] add mkdir $prefix, mkdir $exec_prefix to make install.
+
+2005/07/27: Version 3.8.6
+
+2005/07/27: [test] success in ruby 1.9.0 (2005-07-26) [i686-linux].
+2005/07/27: [impl] update onig-config.in.
+2005/07/26: [new] (thanks Yen-Ju Chen)
+ add Oniguruma configuration check program.
+ (onig-config.in)
+
+2005/07/14: Version 3.8.5
+
+2005/07/14: [test] success in ruby 1.9.0 (2005-07-14) [i686-linux].
+2005/07/11: [test] success in ruby 1.9.0 (2005-07-04) [i686-linux].
+2005/07/11: [bug] (thanks nobu) [ruby-dev:26505]
+ invalid handling for /\c\x/ and /\C-\x/.
+ fix fetch_escaped_value().
+2005/07/05: [impl] (thanks Alexey Zakhlestine)
+ escape GCC 4.0 warnings.
+
+2005/07/01: Version 3.8.4
+
+2005/07/01: [test] success in ruby 1.9.0 (2005-07-01) [i686-linux].
+2005/06/30: [test] success in ruby 1.9.0 (2005-06-28) [i686-linux].
+2005/06/30: [dist] add GB 18030 test to sample/encode.c.
+2005/06/30: [impl] escape warning of gb18030_left_adjust_char_head().
+2005/06/30: [new] (contributed by KUBO Takehiro)
+ add new character encoding ONIG_ENCODING_GB18030.
+2005/06/30: [bug] invalid ctype check for multibyte encodings.
+ ("graph", "print")
+ fix onigenc_mb2/4_is_code_ctype(),
+ eucjp_is_code_ctype() and sjis_is_code_ctype().
+2005/06/30: [bug] invalid conversion from code point to mbc in
+ onigenc_mb4_code_to_mbc().
+
+2005/06/28: Version 3.8.3
+
+2005/06/28: [test] success in ruby 1.9.0 (2005-06-28) [i686-linux].
+2005/06/27: [test] success in ruby 1.9.0 (2005-05-31) [i686-linux].
+2005/06/27: [bug] (thanks Wolfgang Nadasi-Donner)
+ invalid check for never ending recursion.
+ lower zero quantifier should be treated as
+ a non-recursive call alternative.
+ ex. /(?[^()]*(\(\g\)[^()]*)*)/
+2005/06/15: [impl] add divide_ambig_string_node_sub().
+2005/06/15: [dist] add a test to sample/encode.c.
+2005/06/10: [new] add ONIG_SYNTAX_PERL_NG. (Perl + named group)
+
+2005/06/01: Version 3.8.2
+
+2005/06/01: [test] success in ruby 1.9.0 (2005-05-31) [i686-linux].
+2005/05/31: [dist] add doc/FAQ and doc/FAQ.ja.
+2005/05/31: [impl] minor change in node_new().
+2005/05/30: [test] success in ruby 1.9.0 (2005-05-11) [i686-linux].
+2005/05/30: [bug] (thanks Allan Odgaard)
+ FreeNodeList null check should be on thread-atomic
+ in node_new().
+
+2005/05/11: Version 3.8.1
+
+2005/05/11: [test] success in ruby 1.9.0 (2005-05-11) [i386-mswin32].
+2005/05/11: [dist] update win32/Makefile (make 19).
+2005/05/11: [test] success in ruby 1.9.0 (2005-05-11) [i686-linux].
+2005/05/06: [test] success in ruby 1.9.0 (2005-05-06) [i686-linux].
+2005/05/06: [impl] (thanks nobu) [ruby-core:4815]
+ add #ifdef USE_VARIABLE_META_CHARS to goto label.
+2005/04/25: [test] success in ruby 1.9.0 (2005-04-25) [i686-linux].
+2005/04/25: [impl] change DEFAULT_WARN_FUNCTION and DEFAULT_VERB_WARN_FUNCTION
+ to onig_rb_warn() and onig_rb_warning().
+
+2005/04/15: Version 3.8.0
+
+2005/04/15: [test] success in ruby 1.9.0 (2005-04-14) [i686-linux].
+2005/04/01: [test] success in ruby 1.9.0 (2005-03-24) [i686-linux].
+2005/04/01: [impl] (thanks Joe Orton)
+ (thanks Moriyoshi Koizumi)
+ many const-ification to many *.[ch] files.
+
+2005/03/25: Version 3.7.2
+
+2005/03/25: [test] success in ruby 1.9.0 (2005-03-24) [i686-linux].
+2005/03/23: [test] success in ruby 1.9.0 (2005-03-20) [i686-linux].
+2005/03/23: [test] success in ruby 1.9.0 (2005-03-08) [i686-linux].
+2005/03/23: [new] add ONIG_SYNTAX_ASIS.
+2005/03/23: [new] add ONIG_SYN_OP2_INEFFECTIVE_ESCAPE.
+2005/03/09: [spec] rename MBCTYPE_XXX to RE_MBCTYPE_XXX. (GNU API)
+2005/03/08: [test] success in ruby 1.9.0 (2005-03-08) [i686-linux].
+2005/03/08: [impl] (thanks matz) [ruby-dev:25783]
+ should not allocate memory for key data in st.c.
+ move st_*_strend() functions from st.c. fixed some
+ potential memory leaks.
+ (imported from Ruby 1.9 2005-03-08)
+
2005/03/07: Version 3.7.1
2005/03/07: [test] success in ruby 1.9.0 (2005-03-07) [i686-linux].
@@ -24,7 +476,7 @@ History
remove reggnu.c from make 19.
2005/02/19: [dist] update doc/API and doc/API.ja.
2005/02/19: [test] success in ruby 1.9.0 (2005-02-19) [i386-cygwin].
-2005/02/19: [impl] (thanks Alexey Zakhlestin)
+2005/02/19: [impl] (thanks Alexey Zakhlestine)
change UChar* to const UChar* in oniguruma.h,
regenc.h and regparse.h.
2005/02/13: [impl] change UChar* to const UChar* in oniguruma.h and
@@ -1358,16 +1810,29 @@ History
[test: test]
[memo: memo]
--
-
-svn mkdir http://localhost/repos/branches -m ""
-svn mkdir http://localhost/repos/branches/oniguruma -m ""
-svn copy http://localhost/repos/trunk/oniguruma http://localhost/repos/branches/oniguruma/2.X -m "branch for 8-bit encodings only"
-
-
-svn copy http://localhost/repos/trunk/oniguruma http://localhost/repos/tags/oniguruma/X.X.X -m "onigdXXXXXXXX"
-
-
+
cvs history -T
-
+
cvs rtag "VERSION_X_X_X" oniguruma
+
+
+
+* write Makefile.am and configure.in.
+> aclocal
+> libtoolize
+> automake --foreign --add-missing
+> autoconf
+> configure --with-rubydir=... CFLAGS="-O2 -Wall"
+
+
+
+
+ VERSION = current:revision:age
+
+ current: interface number (from 0)
+ revision: implementation number of same interface (from 0)
+ age: number of supported previous interfaces
+ (if current only supported then age == 0)
+
+//END
diff --git a/ext/mbstring/oniguruma/README b/ext/mbstring/oniguruma/README
index dc4fb3b64b..dff7fba562 100644
--- a/ext/mbstring/oniguruma/README
+++ b/ext/mbstring/oniguruma/README
@@ -1,9 +1,8 @@
-README 2005/02/04
+README 2007/06/18
Oniguruma ---- (C) K.Kosako
http://www.geocities.jp/kosako3/oniguruma/
-http://www.ruby-lang.org/cgi-bin/cvsweb.cgi/oniguruma/
http://www.freebsd.org/cgi/cvsweb.cgi/ports/devel/oniguruma/
Oniguruma is a regular expressions library.
@@ -14,11 +13,12 @@ Supported character encodings:
ASCII, UTF-8, UTF-16BE, UTF-16LE, UTF-32BE, UTF-32LE,
EUC-JP, EUC-TW, EUC-KR, EUC-CN,
- Shift_JIS, Big5, KOI8-R, KOI8 (*),
+ Shift_JIS, Big5, GB 18030, KOI8-R, KOI8,
ISO-8859-1, ISO-8859-2, ISO-8859-3, ISO-8859-4, ISO-8859-5,
ISO-8859-6, ISO-8859-7, ISO-8859-8, ISO-8859-9, ISO-8859-10,
ISO-8859-11, ISO-8859-13, ISO-8859-14, ISO-8859-15, ISO-8859-16
+* GB 18030: contributed by KUBO Takehiro
* KOI8 is not included in library archive by default setup.
(need to edit Makefile if you want to use it.)
------------------------------------------------------------
@@ -31,15 +31,20 @@ Install
2. make
3. make install
- library file: libonig.a
+ * uninstall
- test (ASCII/EUC-JP)
+ make uninstall
- make ctest
+ * test (ASCII/EUC-JP)
- uninstall
+ make atest
- make uninstall
+ * configuration check
+
+ onig-config --cflags
+ onig-config --libs
+ onig-config --prefix
+ onig-config --exec-prefix
@@ -73,8 +78,21 @@ Regular Expressions
Usage
- Include oniguruma.h in your program. (native API)
- See doc/API for native API.
+ Include oniguruma.h in your program. (Oniguruma API)
+ See doc/API for Oniguruma API.
+
+ If you want to disable UChar type (== unsigned char) definition
+ in oniguruma.h, define ONIG_ESCAPE_UCHAR_COLLISION and then
+ include oniguruma.h.
+
+ If you want to disable regex_t type definition in oniguruma.h,
+ define ONIG_ESCAPE_REGEX_T_COLLISION and then include oniguruma.h.
+
+ Example of the compiling/linking command line in Unix or Cygwin,
+ (prefix == /usr/local case)
+
+ cc sample.c -L/usr/local/lib -lonig
+
If you want to use static link library(onig_s.lib) in Win32,
add option -DONIG_EXTERN=extern to C compiler.
@@ -83,19 +101,20 @@ Usage
Sample Programs
- sample/simple.c example of the minimum (native API)
+ sample/simple.c example of the minimum (Oniguruma API)
sample/names.c example of the named group callback.
sample/encode.c example of some encodings.
sample/listcap.c example of the capture history.
sample/posix.c POSIX API sample.
sample/sql.c example of the variable meta characters.
(SQL-like pattern matching)
- sample/syntax.c Perl and Java syntax test.
+ sample/syntax.c Perl, Java and ASIS syntax test.
Source Files
oniguruma.h Oniguruma API header file. (public)
+ onig-config.in configuration check program template.
regenc.h character encodings framework header file.
regint.h internal definitions
@@ -125,9 +144,10 @@ Source Files
enc/euc_tw.c EUC-TW encoding.
enc/euc_kr.c EUC-KR, EUC-CN encoding.
enc/sjis.c Shift_JIS encoding.
- enc/big5.c Big5 encoding.
- enc/koi8.c KOI8 encoding.
- enc/koi8_r.c KOI8-R encoding.
+ enc/big5.c Big5 encoding.
+ enc/gb18030.c GB 18030 encoding (contributed by KUBO Takehiro)
+ enc/koi8.c KOI8 encoding.
+ enc/koi8_r.c KOI8-R encoding.
enc/iso8859_1.c ISO-8859-1 encoding. (Latin-1)
enc/iso8859_2.c ISO-8859-2 encoding. (Latin-2)
enc/iso8859_3.c ISO-8859-3 encoding. (Latin-3)
@@ -159,23 +179,11 @@ Source Files
API differences with Japanized GNU regex(version 0.12) of Ruby 1.8/1.6
+ re_compile_fastmap() is removed.
- + re_recompile_pattern() is added.
+ re_alloc_pattern() is added.
-ToDo
-
- ? ignore case in full code point range of Unicode.
- ? Unicode Property.
- ? ambig-flag Katakana <-> Hiragana.
- ? add ONIG_OPTION_NOTBOS/NOTEOS. (\A, \z, \Z)
- ? add ONIG_SYNTAX_ASIS.
- ?? \X (== \PM\pM*)
- ?? implement syntax behavior ONIG_SYN_CONTEXT_INDEP_ANCHORS.
- ?? variable line separator.
- ?? transmission stopper. (return ONIG_STOP from match_at())
-and I'm thankful to Akinori MUSHA.
+I'm thankful to Akinori MUSHA.
Mail Address: K.Kosako
diff --git a/ext/mbstring/oniguruma/README.ja b/ext/mbstring/oniguruma/README.ja
index 44553abfef..2dee793cae 100644
--- a/ext/mbstring/oniguruma/README.ja
+++ b/ext/mbstring/oniguruma/README.ja
@@ -1,9 +1,8 @@
-README.ja 2005/02/04
+README.ja 2007/06/18
µ´¼Ö ---- (C) K.Kosako
http://www.geocities.jp/kosako3/oniguruma/
-http://www.ruby-lang.org/cgi-bin/cvsweb.cgi/oniguruma/
http://www.freebsd.org/cgi/cvsweb.cgi/ports/devel/oniguruma/
µ´¼Ö¤ÏÀµµ¬É½¸½¥é¥¤¥Ö¥é¥ê¤Ç¤¢¤ë¡£
@@ -14,11 +13,12 @@ http://www.freebsd.org/cgi/cvsweb.cgi/ports/devel/oniguruma/
ASCII, UTF-8, UTF-16BE, UTF-16LE, UTF-32BE, UTF-32LE,
EUC-JP, EUC-TW, EUC-KR, EUC-CN,
- Shift_JIS, Big5, KOI8-R, KOI8 (*),
+ Shift_JIS, Big5, GB 18030, KOI8-R, KOI8,
ISO-8859-1, ISO-8859-2, ISO-8859-3, ISO-8859-4, ISO-8859-5,
ISO-8859-6, ISO-8859-7, ISO-8859-8, ISO-8859-9, ISO-8859-10,
ISO-8859-11, ISO-8859-13, ISO-8859-14, ISO-8859-15, ISO-8859-16
+* GB 18030: µ×ÊÝ·òÍλáÄó¶¡
* KOI8¤Ï¥Ç¥Õ¥©¥ë¥È¤Î¥»¥Ã¥È¥¢¥Ã¥×¤Ç¤Ï¥é¥¤¥Ö¥é¥ê¤ÎÃæ¤Ë´Þ¤Þ¤ì¤Ê¤¤¡£
(ɬÍפǤ¢¤ì¤ÐMakefile¤òÊÔ½¸¤¹¤ë¤³¤È)
------------------------------------------------------------
@@ -31,15 +31,21 @@ http://www.freebsd.org/cgi/cvsweb.cgi/ports/devel/oniguruma/
2. make
3. make install
- ¥é¥¤¥Ö¥é¥ê¥Õ¥¡¥¤¥ë: libonig.a
+ ¥¢¥ó¥¤¥ó¥¹¥È¡¼¥ë
+
+ make uninstall
ưºî¥Æ¥¹¥È (ASCII/EUC-JP)
- make ctest
+ make atest
- ¥¢¥ó¥¤¥ó¥¹¥È¡¼¥ë
- make uninstall
+ ¹½À®³Îǧ
+
+ onig-config --cflags
+ onig-config --libs
+ onig-config --prefix
+ onig-config --exec-prefix
@@ -71,8 +77,28 @@ http://www.freebsd.org/cgi/cvsweb.cgi/ports/devel/oniguruma/
»ÈÍÑÊýË¡
- »ÈÍѤ¹¤ë¥×¥í¥°¥é¥à¤Ç¡¢oniguruma.h¤ò¥¤¥ó¥¯¥ë¡¼¥É¤¹¤ë(Native API¤Î¾ì¹ç)¡£
- Native API¤Ë¤Ä¤¤¤Æ¤Ï¡¢doc/API.ja¤ò»²¾È¡£
+ »ÈÍѤ¹¤ë¥×¥í¥°¥é¥à¤Ç¡¢oniguruma.h¤ò¥¤¥ó¥¯¥ë¡¼¥É¤¹¤ë(Oniguruma API¤Î¾ì¹ç)¡£
+ Oniguruma API¤Ë¤Ä¤¤¤Æ¤Ï¡¢doc/API.ja¤ò»²¾È¡£
+
+ oniguruma.h¤ÇÄêµÁ¤µ¤ì¤Æ¤¤¤ë·¿Ì¾UChar(== unsigned char)¤ò̵¸ú¤Ë¤·¤¿¤¤¾ì¹ç
+ ¤Ë¤Ï¡¢ONIG_ESCAPE_UCHAR_COLLISION¤òdefine¤·¤Æ¤«¤éoniguruma.h¤ò¥¤¥ó¥¯¥ë¡¼¥É
+ ¤¹¤ë¤³¤È¡£¤³¤Î¤È¤¤Ë¤ÏUChar¤ÏÄêµÁ¤µ¤ì¤º¡¢OnigUChar¤È¤¤¤¦Ì¾Á°¤ÎÄêµÁ¤Î¤ß¤¬
+ ͸ú¤Ë¤Ê¤ë¡£
+
+ oniguruma.h¤ÇÄêµÁ¤µ¤ì¤Æ¤¤¤ë·¿Ì¾regex_t¤ò̵¸ú¤Ë¤·¤¿¤¤¾ì¹ç¤Ë¤Ï¡¢
+ ONIG_ESCAPE_REGEX_T_COLLISION¤òdefine¤·¤Æ¤«¤éoniguruma.h¤ò¥¤¥ó¥¯¥ë¡¼¥É
+ ¤¹¤ë¤³¤È¡£¤³¤Î¤È¤¤Ë¤Ïregex_t¤ÏÄêµÁ¤µ¤ì¤º¡¢OnigRegexType, OnigRegex¤È¤¤¤¦
+ ̾Á°¤ÎÄêµÁ¤Î¤ß¤¬Í¸ú¤Ë¤Ê¤ë¡£
+
+ Unix/Cygwin¾å¤Ç¥³¥ó¥Ñ¥¤¥ë¡¢¥ê¥ó¥¯¤¹¤ë¾ì¹ç¤ÎÎã¡§
+ (prefix¤¬/usr/local¤Î¤È¤)
+ cc sample.c -L/usr/local/lib -lonig
+
+ GNU libtool¤ò»ÈÍѤ·¤Æ¤¤¤ë¤Î¤Ç¡¢¥×¥é¥Ã¥È¥Õ¥©¡¼¥à¤¬¶¦Í¥é¥¤¥Ö¥é¥ê¤ò¥µ¥Ý¡¼¥È¤·¤Æ
+ ¤¤¤ì¤Ð¡¢»ÈÍѤǤ¤ë¤è¤¦¤Ë¤Ê¤Ã¤Æ¤¤¤ë¡£
+ ÀÅۥ饤¥Ö¥é¥ê¤È¶¦Í¥é¥¤¥Ö¥é¥ê¤Î¤É¤Á¤é¤ò»ÈÍѤ¹¤ë¤«¤ò»ØÄꤹ¤ëÊýË¡¡¢¼Â¹Ô»þÅÀ¤Ç¤Î
+ ´Ä¶ÀßÄêÊýË¡¤Ë¤Ä¤Æ¤Ï¡¢¼«Ê¬¤ÇÄ´¤Ù¤Æ²¼¤µ¤¤¡£
+
Win32¤Ç¥¹¥¿¥Æ¥£¥Ã¥¯¥ê¥ó¥¯¥é¥¤¥Ö¥é¥ê(onig_s.lib)¤ò¥ê¥ó¥¯¤¹¤ë¾ì¹ç¤Ë¤Ï¡¢
¥³¥ó¥Ñ¥¤¥ë¤¹¤ë¤È¤¤Ë -DONIG_EXTERN=extern ¤ò¥³¥ó¥Ñ¥¤¥ë°ú¿ô¤ËÄɲ乤뤳¤È¡£
@@ -80,18 +106,19 @@ http://www.freebsd.org/cgi/cvsweb.cgi/ports/devel/oniguruma/
»ÈÍÑÎã¥×¥í¥°¥é¥à
- sample/simple.c ºÇ¾®Îã (native API)
+ sample/simple.c ºÇ¾®Îã (Oniguruma API)
sample/names.c ̾Á°ÉÕ¤¥°¥ë¡¼¥×¥³¡¼¥ë¥Ð¥Ã¥¯»ÈÍÑÎã
sample/encode.c ´ö¤Ä¤«¤Îʸ»ú¥¨¥ó¥³¡¼¥Ç¥£¥ó¥°»ÈÍÑÎã
sample/listcap.c Êá³ÍÍúÎòµ¡Ç½¤Î»ÈÍÑÎã
sample/posix.c POSIX API»ÈÍÑÎã
sample/sql.c ²ÄÊѥ᥿ʸ»úµ¡Ç½»ÈÍÑÎã (SQL-like ¥Ñ¥¿¡¼¥ó)
- sample/syntax.c Perl¤ÈJavaʸˡ¤Î¥Æ¥¹¥È
+ sample/syntax.c Perl¡¢Java¡¢ASISʸˡ¤Î¥Æ¥¹¥È
¥½¡¼¥¹¥Õ¥¡¥¤¥ë
oniguruma.h µ´¼ÖAPI¥Ø¥Ã¥À (¸ø³«)
+ onig-config.in onig-config¥×¥í¥°¥é¥à ¥Æ¥ó¥×¥ì¡¼¥È
regenc.h ʸ»ú¥¨¥ó¥³¡¼¥Ç¥£¥ó¥°ÏÈÁȤߥإåÀ
regint.h ÆâÉôÀë¸À
@@ -122,6 +149,7 @@ http://www.freebsd.org/cgi/cvsweb.cgi/ports/devel/oniguruma/
enc/euc_kr.c EUC-KR, EUC-CN ¥¨¥ó¥³¡¼¥Ç¥£¥ó¥°
enc/sjis.c Shift_JIS ¥¨¥ó¥³¡¼¥Ç¥£¥ó¥°
enc/big5.c Big5 ¥¨¥ó¥³¡¼¥Ç¥£¥ó¥°
+ enc/gb18030.c GB 18030 ¥¨¥ó¥³¡¼¥Ç¥£¥ó¥° (µ×ÊÝ·òÍλá Äó¶¡)
enc/koi8.c KOI8 ¥¨¥ó¥³¡¼¥Ç¥£¥ó¥°
enc/koi8_r.c KOI8-R ¥¨¥ó¥³¡¼¥Ç¥£¥ó¥°
enc/iso8859_1.c ISO-8859-1 (Latin-1)
@@ -155,23 +183,10 @@ http://www.freebsd.org/cgi/cvsweb.cgi/ports/devel/oniguruma/
Ruby 1.8/1.6¤ÎÆüËܸ첽GNU regex¤È¤ÎAPI¤Î°ã¤¤
+ re_compile_fastmap() ¤Ïºï½ü¤µ¤ì¤¿¡£
- + re_recompile_pattern() ¤¬Äɲ䵤줿¡£
+ re_alloc_pattern() ¤¬Äɲ䵤줿¡£
-»Ä·ï
-
- ? UnicodeÁ´¥³¡¼¥É¥Ý¥¤¥ó¥ÈÎΰè¤Ç¤ÎÂçʸ»ú¾®Ê¸»ú¾È¹ç
- ? Unicode¥×¥í¥Ñ¥Æ¥£
- ? ambig-flag Katakana <-> Hiragana
- ? ONIG_OPTION_NOTBOS/NOTEOSÄɲà (\A, \z, \Z)
- ? ONIG_SYNTAX_ASISÄɲÃ
- ?? \X (== \PM\pM*)
- ?? ʸˡÍ×ÁÇ ONIG_SYN_CONTEXT_INDEP_ANCHORS¤Î¼ÂÁõ
- ?? ²þ¹Ôʸ»ú(ʸ»úÎó)¤òÊѹ¹¤Ç¤¤ë
- ?? ¸¡º÷°ÌÃÖ°ÜÆ°Ää»ß±é»»»Ò (match_at()¤«¤éONIG_STOP¤òÊÖ¤¹)
-
-and I'm thankful to Akinori MUSHA.
+I'm thankful to Akinori MUSHA.
-Mail Address: K.Kosako
+¥¢¥É¥ì¥¹: K.Kosako
diff --git a/ext/mbstring/oniguruma/config.h.in b/ext/mbstring/oniguruma/config.h.in
index 5ca2056fb3..4a2fc28d82 100644
--- a/ext/mbstring/oniguruma/config.h.in
+++ b/ext/mbstring/oniguruma/config.h.in
@@ -1,69 +1,108 @@
-/* config.h.in. Generated automatically from configure.in by autoheader. */
+/* config.h.in. Generated from configure.in by autoheader. */
-/* Define if using alloca.c. */
-#undef C_ALLOCA
-
-/* Define to empty if the keyword does not work. */
-#undef const
-
-/* Define to one of _getb67, GETB67, getb67 for Cray-2 and Cray-YMP systems.
- This function is required for alloca.c support on those systems. */
+/* Define to one of `_getb67', `GETB67', `getb67' for Cray-2 and Cray-YMP
+ systems. This function is required for `alloca.c' support on those systems.
+ */
#undef CRAY_STACKSEG_END
-/* Define if you have alloca, as a function or macro. */
+/* Define to 1 if using `alloca.c'. */
+#undef C_ALLOCA
+
+/* Define to 1 if you have `alloca', as a function or macro. */
#undef HAVE_ALLOCA
-/* Define if you have and it should be used (not on Ultrix). */
+/* Define to 1 if you have and it should be used (not on Ultrix).
+ */
#undef HAVE_ALLOCA_H
-/* If using the C implementation of alloca, define if you know the
- direction of stack growth for your system; otherwise it will be
- automatically deduced at run-time.
- STACK_DIRECTION > 0 => grows toward higher addresses
- STACK_DIRECTION < 0 => grows toward lower addresses
- STACK_DIRECTION = 0 => direction of growth unknown
- */
-#undef STACK_DIRECTION
+/* Define to 1 if you have the header file. */
+#undef HAVE_DLFCN_H
-/* Define if you have the ANSI C header files. */
-#undef STDC_HEADERS
+/* Define to 1 if you have the header file. */
+#undef HAVE_INTTYPES_H
-/* Define if you can safely include both and . */
-#undef TIME_WITH_SYS_TIME
+/* Define to 1 if you have the header file. */
+#undef HAVE_MEMORY_H
-/* The number of bytes in a int. */
-#undef SIZEOF_INT
+/* Define if compilerr supports prototypes */
+#undef HAVE_PROTOTYPES
-/* The number of bytes in a long. */
-#undef SIZEOF_LONG
+/* Define if compiler supports stdarg prototypes */
+#undef HAVE_STDARG_PROTOTYPES
-/* The number of bytes in a short. */
-#undef SIZEOF_SHORT
+/* Define to 1 if you have the header file. */
+#undef HAVE_STDINT_H
-/* Define if you have the header file. */
+/* Define to 1 if you have the header file. */
#undef HAVE_STDLIB_H
-/* Define if you have the header file. */
+/* Define to 1 if you have the header file. */
+#undef HAVE_STRINGS_H
+
+/* Define to 1 if you have the header file. */
#undef HAVE_STRING_H
-/* Define if you have the header file. */
-#undef HAVE_STRINGS_H
+/* Define to 1 if you have the header file. */
+#undef HAVE_SYS_STAT_H
-/* Define if you have the header file. */
-#undef HAVE_SYS_TYPES_H
+/* Define to 1 if you have the header file. */
+#undef HAVE_SYS_TIMES_H
-/* Define if you have the header file. */
+/* Define to 1 if you have the header file. */
#undef HAVE_SYS_TIME_H
-/* Define if you have the header file. */
-#undef HAVE_SYS_TIMES_H
+/* Define to 1 if you have the header file. */
+#undef HAVE_SYS_TYPES_H
-/* Define if you have the header file. */
+/* Define to 1 if you have the header file. */
#undef HAVE_UNISTD_H
-/* Define if you have the function argument prototype */
-#undef HAVE_PROTOTYPES
+/* Name of package */
+#undef PACKAGE
-/* Define if you have the variable length prototypes and stdarg.h */
-#undef HAVE_STDARG_PROTOTYPES
+/* Define to the address where bug reports for this package should be sent. */
+#undef PACKAGE_BUGREPORT
+
+/* Define to the full name of this package. */
+#undef PACKAGE_NAME
+
+/* Define to the full name and version of this package. */
+#undef PACKAGE_STRING
+
+/* Define to the one symbol short name of this package. */
+#undef PACKAGE_TARNAME
+
+/* Define to the version of this package. */
+#undef PACKAGE_VERSION
+
+/* The size of a `int', as computed by sizeof. */
+#undef SIZEOF_INT
+
+/* The size of a `long', as computed by sizeof. */
+#undef SIZEOF_LONG
+
+/* The size of a `short', as computed by sizeof. */
+#undef SIZEOF_SHORT
+/* If using the C implementation of alloca, define if you know the
+ direction of stack growth for your system; otherwise it will be
+ automatically deduced at run-time.
+ STACK_DIRECTION > 0 => grows toward higher addresses
+ STACK_DIRECTION < 0 => grows toward lower addresses
+ STACK_DIRECTION = 0 => direction of growth unknown */
+#undef STACK_DIRECTION
+
+/* Define to 1 if you have the ANSI C header files. */
+#undef STDC_HEADERS
+
+/* Define to 1 if you can safely include both and . */
+#undef TIME_WITH_SYS_TIME
+
+/* Define if combination explosion check */
+#undef USE_COMBINATION_EXPLOSION_CHECK
+
+/* Version number of package */
+#undef VERSION
+
+/* Define to empty if `const' does not conform to ANSI C. */
+#undef const
diff --git a/ext/mbstring/oniguruma/enc/big5.c b/ext/mbstring/oniguruma/enc/big5.c
index 763872e963..86792666a4 100644
--- a/ext/mbstring/oniguruma/enc/big5.c
+++ b/ext/mbstring/oniguruma/enc/big5.c
@@ -29,7 +29,7 @@
#include "regenc.h"
-static int EncLen_BIG5[] = {
+static const int EncLen_BIG5[] = {
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
diff --git a/ext/mbstring/oniguruma/enc/euc_jp.c b/ext/mbstring/oniguruma/enc/euc_jp.c
index 5f13e33eb4..71c81ee9fe 100644
--- a/ext/mbstring/oniguruma/enc/euc_jp.c
+++ b/ext/mbstring/oniguruma/enc/euc_jp.c
@@ -31,7 +31,7 @@
#define eucjp_islead(c) ((UChar )((c) - 0xa1) > 0xfe - 0xa1)
-static int EncLen_EUCJP[] = {
+static const int EncLen_EUCJP[] = {
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
@@ -158,20 +158,16 @@ eucjp_is_mbc_ambiguous(OnigAmbigType flag, const UChar** pp, const UChar* end)
static int
eucjp_is_code_ctype(OnigCodePoint code, unsigned int ctype)
{
- if ((ctype & ONIGENC_CTYPE_WORD) != 0) {
- if (code < 128)
- return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype);
- else
+ if (code < 128)
+ return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype);
+ else {
+ if ((ctype & (ONIGENC_CTYPE_WORD |
+ ONIGENC_CTYPE_GRAPH | ONIGENC_CTYPE_PRINT)) != 0) {
return (eucjp_code_to_mbclen(code) > 1 ? TRUE : FALSE);
-
- ctype &= ~ONIGENC_CTYPE_WORD;
- if (ctype == 0) return FALSE;
+ }
}
- if (code < 128)
- return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype);
- else
- return FALSE;
+ return FALSE;
}
static UChar*
diff --git a/ext/mbstring/oniguruma/enc/euc_kr.c b/ext/mbstring/oniguruma/enc/euc_kr.c
index c1e83b7e66..57bf801536 100644
--- a/ext/mbstring/oniguruma/enc/euc_kr.c
+++ b/ext/mbstring/oniguruma/enc/euc_kr.c
@@ -29,7 +29,7 @@
#include "regenc.h"
-static int EncLen_EUCKR[] = {
+static const int EncLen_EUCKR[] = {
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
diff --git a/ext/mbstring/oniguruma/enc/euc_tw.c b/ext/mbstring/oniguruma/enc/euc_tw.c
index 4e5851a451..6f396e75e6 100644
--- a/ext/mbstring/oniguruma/enc/euc_tw.c
+++ b/ext/mbstring/oniguruma/enc/euc_tw.c
@@ -29,7 +29,7 @@
#include "regenc.h"
-static int EncLen_EUCTW[] = {
+static const int EncLen_EUCTW[] = {
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
diff --git a/ext/mbstring/oniguruma/enc/iso8859_1.c b/ext/mbstring/oniguruma/enc/iso8859_1.c
index 53ad52ee13..5646f26c10 100644
--- a/ext/mbstring/oniguruma/enc/iso8859_1.c
+++ b/ext/mbstring/oniguruma/enc/iso8859_1.c
@@ -2,7 +2,7 @@
iso8859_1.c - Oniguruma (regular expression library)
**********************************************************************/
/*-
- * Copyright (c) 2002-2005 K.Kosako
+ * Copyright (c) 2002-2006 K.Kosako
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -32,23 +32,23 @@
#define ENC_IS_ISO_8859_1_CTYPE(code,ctype) \
((EncISO_8859_1_CtypeTable[code] & ctype) != 0)
-static unsigned short EncISO_8859_1_CtypeTable[256] = {
+static const unsigned short EncISO_8859_1_CtypeTable[256] = {
0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
0x2008, 0x220c, 0x2209, 0x2208, 0x2208, 0x2208, 0x2008, 0x2008,
0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
- 0x2284, 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x21a0, 0x21a0, 0x21a0,
- 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
+ 0x2284, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
+ 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0,
- 0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x20a0, 0x20a0, 0x20a0, 0x21a0,
+ 0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
0x21a0, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x34a2,
0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
- 0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x31a0,
- 0x20a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2,
+ 0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x31a0,
+ 0x21a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2,
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
- 0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x20a0, 0x21a0, 0x20a0, 0x2008,
+ 0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x2008,
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
@@ -72,16 +72,6 @@ iso_8859_1_mbc_to_normalize(OnigAmbigType flag, const UChar** pp, const UChar* e
{
const UChar* p = *pp;
- if (end > p + 1 && (flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0) {
- if ((*p == 's' && *(p+1) == 's') ||
- ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
- (*p == 'S' && *(p+1) == 'S'))) {
- *lower = 0xdf;
- (*pp) += 2;
- return 1;
- }
- }
-
if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
ONIGENC_IS_MBC_ASCII(p)) ||
((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 &&
@@ -101,22 +91,6 @@ iso_8859_1_is_mbc_ambiguous(OnigAmbigType flag,
{
const UChar* p = *pp;
- if ((flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0) {
- if (end > p + 1) {
- if ((*p == 's' && *(p+1) == 's') ||
- ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
- (*p == 'S' && *(p+1) == 'S'))) {
- (*pp) += 2;
- return TRUE;
- }
- }
-
- if (*p == 0xdf) {
- (*pp)++;
- return TRUE;
- }
- }
-
(*pp)++;
if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
ONIGENC_IS_MBC_ASCII(p)) ||
@@ -153,8 +127,7 @@ OnigEncodingType OnigEncodingISO_8859_1 = {
1, /* max enc length */
1, /* min enc length */
(ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE |
- ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE |
- ONIGENC_AMBIGUOUS_MATCH_COMPOUND),
+ ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE ),
{
(OnigCodePoint )'\\' /* esc */
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */
diff --git a/ext/mbstring/oniguruma/enc/iso8859_10.c b/ext/mbstring/oniguruma/enc/iso8859_10.c
index a9331cebf3..8081ef8010 100644
--- a/ext/mbstring/oniguruma/enc/iso8859_10.c
+++ b/ext/mbstring/oniguruma/enc/iso8859_10.c
@@ -2,7 +2,7 @@
iso8859_10.c - Oniguruma (regular expression library)
**********************************************************************/
/*-
- * Copyright (c) 2002-2005 K.Kosako
+ * Copyright (c) 2002-2006 K.Kosako
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -33,7 +33,7 @@
#define ENC_IS_ISO_8859_10_CTYPE(code,ctype) \
((EncISO_8859_10_CtypeTable[code] & ctype) != 0)
-static UChar EncISO_8859_10_ToLowerCaseTable[256] = {
+static const UChar EncISO_8859_10_ToLowerCaseTable[256] = {
'\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
'\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017',
'\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027',
@@ -68,23 +68,23 @@ static UChar EncISO_8859_10_ToLowerCaseTable[256] = {
'\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377'
};
-static unsigned short EncISO_8859_10_CtypeTable[256] = {
+static const unsigned short EncISO_8859_10_CtypeTable[256] = {
0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
0x2008, 0x220c, 0x2209, 0x2208, 0x2208, 0x2208, 0x2008, 0x2008,
0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
- 0x2284, 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x21a0, 0x21a0, 0x21a0,
- 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
+ 0x2284, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
+ 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0,
- 0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x20a0, 0x20a0, 0x20a0, 0x21a0,
+ 0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
0x21a0, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x34a2,
0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
- 0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x31a0,
- 0x20a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2,
+ 0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x31a0,
+ 0x21a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2,
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
- 0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x20a0, 0x21a0, 0x20a0, 0x2008,
+ 0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x2008,
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
@@ -109,16 +109,6 @@ iso_8859_10_mbc_to_normalize(OnigAmbigType flag,
{
const UChar* p = *pp;
- if (end > p + 1 && (flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0) {
- if ((*p == 's' && *(p+1) == 's') ||
- ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
- (*p == 'S' && *(p+1) == 'S'))) {
- *lower = 0xdf;
- (*pp) += 2;
- return 1;
- }
- }
-
if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
ONIGENC_IS_MBC_ASCII(p)) ||
((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 &&
@@ -138,22 +128,6 @@ iso_8859_10_is_mbc_ambiguous(OnigAmbigType flag,
{
const UChar* p = *pp;
- if ((flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0) {
- if (end > p + 1) {
- if ((*p == 's' && *(p+1) == 's') ||
- ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
- (*p == 'S' && *(p+1) == 'S'))) {
- (*pp) += 2;
- return TRUE;
- }
- }
-
- if (*p == 0xdf) {
- (*pp)++;
- return TRUE;
- }
- }
-
(*pp)++;
if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
ONIGENC_IS_MBC_ASCII(p)) ||
@@ -186,9 +160,9 @@ iso_8859_10_is_code_ctype(OnigCodePoint code, unsigned int ctype)
static int
iso_8859_10_get_all_pair_ambig_codes(OnigAmbigType flag,
- OnigPairAmbigCodes** ccs)
+ const OnigPairAmbigCodes** ccs)
{
- static OnigPairAmbigCodes cc[] = {
+ static const OnigPairAmbigCodes cc[] = {
{ 0xa1, 0xb1 },
{ 0xa2, 0xb2 },
{ 0xa3, 0xb3 },
@@ -302,8 +276,7 @@ OnigEncodingType OnigEncodingISO_8859_10 = {
1, /* max enc length */
1, /* min enc length */
(ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE |
- ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE |
- ONIGENC_AMBIGUOUS_MATCH_COMPOUND),
+ ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE ),
{
(OnigCodePoint )'\\' /* esc */
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */
diff --git a/ext/mbstring/oniguruma/enc/iso8859_11.c b/ext/mbstring/oniguruma/enc/iso8859_11.c
index bb1098807a..de9bb3b825 100644
--- a/ext/mbstring/oniguruma/enc/iso8859_11.c
+++ b/ext/mbstring/oniguruma/enc/iso8859_11.c
@@ -32,23 +32,23 @@
#define ENC_IS_ISO_8859_11_CTYPE(code,ctype) \
((EncISO_8859_11_CtypeTable[code] & ctype) != 0)
-static unsigned short EncISO_8859_11_CtypeTable[256] = {
+static const unsigned short EncISO_8859_11_CtypeTable[256] = {
0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
0x2008, 0x220c, 0x2209, 0x2208, 0x2208, 0x2208, 0x2008, 0x2008,
0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
- 0x2284, 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x21a0, 0x21a0, 0x21a0,
- 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
+ 0x2284, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
+ 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0,
- 0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x20a0, 0x20a0, 0x20a0, 0x21a0,
+ 0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
0x21a0, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x34a2,
0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
- 0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x31a0,
- 0x20a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2,
+ 0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x31a0,
+ 0x21a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2,
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
- 0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x20a0, 0x21a0, 0x20a0, 0x2008,
+ 0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x2008,
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
diff --git a/ext/mbstring/oniguruma/enc/iso8859_13.c b/ext/mbstring/oniguruma/enc/iso8859_13.c
index 827ca508e8..69316edfc3 100644
--- a/ext/mbstring/oniguruma/enc/iso8859_13.c
+++ b/ext/mbstring/oniguruma/enc/iso8859_13.c
@@ -2,7 +2,7 @@
iso8859_13.c - Oniguruma (regular expression library)
**********************************************************************/
/*-
- * Copyright (c) 2002-2005 K.Kosako
+ * Copyright (c) 2002-2007 K.Kosako
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -33,7 +33,7 @@
#define ENC_IS_ISO_8859_13_CTYPE(code,ctype) \
((EncISO_8859_13_CtypeTable[code] & ctype) != 0)
-static UChar EncISO_8859_13_ToLowerCaseTable[256] = {
+static const UChar EncISO_8859_13_ToLowerCaseTable[256] = {
'\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
'\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017',
'\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027',
@@ -68,23 +68,23 @@ static UChar EncISO_8859_13_ToLowerCaseTable[256] = {
'\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377'
};
-static unsigned short EncISO_8859_13_CtypeTable[256] = {
+static const unsigned short EncISO_8859_13_CtypeTable[256] = {
0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
0x2008, 0x220c, 0x2209, 0x2208, 0x2208, 0x2208, 0x2008, 0x2008,
0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
- 0x2284, 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x21a0, 0x21a0, 0x21a0,
- 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
+ 0x2284, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
+ 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0,
- 0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x20a0, 0x20a0, 0x20a0, 0x21a0,
+ 0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
0x21a0, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x34a2,
0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
- 0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x31a0,
- 0x20a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2,
+ 0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x31a0,
+ 0x21a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2,
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
- 0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x20a0, 0x21a0, 0x20a0, 0x2008,
+ 0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x2008,
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
@@ -104,21 +104,11 @@ static unsigned short EncISO_8859_13_CtypeTable[256] = {
};
static int
-iso_8859_13_mbc_to_normalize(OnigAmbigType flag,
- const UChar** pp, const UChar* end, UChar* lower)
+mbc_to_normalize(OnigAmbigType flag,
+ const UChar** pp, const UChar* end, UChar* lower)
{
const UChar* p = *pp;
- if (end > p + 1 && (flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0) {
- if ((*p == 's' && *(p+1) == 's') ||
- ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
- (*p == 'S' && *(p+1) == 'S'))) {
- *lower = 0xdf;
- (*pp) += 2;
- return 1;
- }
- }
-
if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
ONIGENC_IS_MBC_ASCII(p)) ||
((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 &&
@@ -133,27 +123,10 @@ iso_8859_13_mbc_to_normalize(OnigAmbigType flag,
}
static int
-iso_8859_13_is_mbc_ambiguous(OnigAmbigType flag,
- const UChar** pp, const UChar* end)
+is_mbc_ambiguous(OnigAmbigType flag, const UChar** pp, const UChar* end)
{
const UChar* p = *pp;
- if ((flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0) {
- if (end > p + 1) {
- if ((*p == 's' && *(p+1) == 's') ||
- ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
- (*p == 'S' && *(p+1) == 'S'))) {
- (*pp) += 2;
- return TRUE;
- }
- }
-
- if (*p == 0xdf) {
- (*pp)++;
- return TRUE;
- }
- }
-
(*pp)++;
if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
ONIGENC_IS_MBC_ASCII(p)) ||
@@ -176,7 +149,7 @@ iso_8859_13_is_mbc_ambiguous(OnigAmbigType flag,
}
static int
-iso_8859_13_is_code_ctype(OnigCodePoint code, unsigned int ctype)
+is_code_ctype(OnigCodePoint code, unsigned int ctype)
{
if (code < 256)
return ENC_IS_ISO_8859_13_CTYPE(code, ctype);
@@ -185,74 +158,73 @@ iso_8859_13_is_code_ctype(OnigCodePoint code, unsigned int ctype)
}
static int
-iso_8859_13_get_all_pair_ambig_codes(OnigAmbigType flag,
- OnigPairAmbigCodes** ccs)
+get_all_pair_ambig_codes(OnigAmbigType flag, const OnigPairAmbigCodes** ccs)
{
- static OnigPairAmbigCodes cc[] = {
- { 0xc0, 0xe0 },
- { 0xc1, 0xe1 },
- { 0xc2, 0xe2 },
- { 0xc3, 0xe3 },
- { 0xc4, 0xe4 },
- { 0xc5, 0xe5 },
- { 0xc6, 0xe6 },
- { 0xc7, 0xe7 },
- { 0xc8, 0xe8 },
- { 0xc9, 0xe9 },
- { 0xca, 0xea },
- { 0xcb, 0xeb },
- { 0xcc, 0xec },
- { 0xcd, 0xed },
- { 0xce, 0xee },
- { 0xcf, 0xef },
+ static const OnigPairAmbigCodes cc[] = {
+ { 0xc0, 0xe0 },
+ { 0xc1, 0xe1 },
+ { 0xc2, 0xe2 },
+ { 0xc3, 0xe3 },
+ { 0xc4, 0xe4 },
+ { 0xc5, 0xe5 },
+ { 0xc6, 0xe6 },
+ { 0xc7, 0xe7 },
+ { 0xc8, 0xe8 },
+ { 0xc9, 0xe9 },
+ { 0xca, 0xea },
+ { 0xcb, 0xeb },
+ { 0xcc, 0xec },
+ { 0xcd, 0xed },
+ { 0xce, 0xee },
+ { 0xcf, 0xef },
- { 0xd0, 0xf0 },
- { 0xd1, 0xf1 },
- { 0xd2, 0xf2 },
- { 0xd3, 0xf3 },
- { 0xd4, 0xf4 },
- { 0xd5, 0xf5 },
- { 0xd6, 0xf6 },
- { 0xd8, 0xf8 },
- { 0xd9, 0xf9 },
- { 0xda, 0xfa },
- { 0xdb, 0xfb },
- { 0xdc, 0xfc },
- { 0xdd, 0xfd },
- { 0xde, 0xfe },
+ { 0xd0, 0xf0 },
+ { 0xd1, 0xf1 },
+ { 0xd2, 0xf2 },
+ { 0xd3, 0xf3 },
+ { 0xd4, 0xf4 },
+ { 0xd5, 0xf5 },
+ { 0xd6, 0xf6 },
+ { 0xd8, 0xf8 },
+ { 0xd9, 0xf9 },
+ { 0xda, 0xfa },
+ { 0xdb, 0xfb },
+ { 0xdc, 0xfc },
+ { 0xdd, 0xfd },
+ { 0xde, 0xfe },
- { 0xe0, 0xc0 },
- { 0xe1, 0xc1 },
- { 0xe2, 0xc2 },
- { 0xe3, 0xc3 },
- { 0xe4, 0xc4 },
- { 0xe5, 0xc5 },
- { 0xe6, 0xc6 },
- { 0xe7, 0xc7 },
- { 0xe8, 0xc8 },
- { 0xe9, 0xc9 },
- { 0xea, 0xca },
- { 0xeb, 0xcb },
- { 0xec, 0xcc },
- { 0xed, 0xcd },
- { 0xee, 0xce },
- { 0xef, 0xcf },
+ { 0xe0, 0xc0 },
+ { 0xe1, 0xc1 },
+ { 0xe2, 0xc2 },
+ { 0xe3, 0xc3 },
+ { 0xe4, 0xc4 },
+ { 0xe5, 0xc5 },
+ { 0xe6, 0xc6 },
+ { 0xe7, 0xc7 },
+ { 0xe8, 0xc8 },
+ { 0xe9, 0xc9 },
+ { 0xea, 0xca },
+ { 0xeb, 0xcb },
+ { 0xec, 0xcc },
+ { 0xed, 0xcd },
+ { 0xee, 0xce },
+ { 0xef, 0xcf },
- { 0xf0, 0xd0 },
- { 0xf1, 0xd1 },
- { 0xf2, 0xd2 },
- { 0xf3, 0xd3 },
- { 0xf4, 0xd4 },
- { 0xf5, 0xd5 },
- { 0xf6, 0xd6 },
- { 0xf8, 0xd8 },
- { 0xf9, 0xd9 },
- { 0xfa, 0xda },
- { 0xfb, 0xdb },
- { 0xfc, 0xdc },
- { 0xfd, 0xdd },
- { 0xfe, 0xde }
- };
+ { 0xf0, 0xd0 },
+ { 0xf1, 0xd1 },
+ { 0xf2, 0xd2 },
+ { 0xf3, 0xd3 },
+ { 0xf4, 0xd4 },
+ { 0xf5, 0xd5 },
+ { 0xf6, 0xd6 },
+ { 0xf8, 0xd8 },
+ { 0xf9, 0xd9 },
+ { 0xfa, 0xda },
+ { 0xfb, 0xdb },
+ { 0xfc, 0xdc },
+ { 0xfd, 0xdd },
+ { 0xfe, 0xde }
+ };
if (flag == ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) {
*ccs = OnigAsciiPairAmbigCodes;
@@ -272,8 +244,7 @@ OnigEncodingType OnigEncodingISO_8859_13 = {
1, /* max enc length */
1, /* min enc length */
(ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE |
- ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE |
- ONIGENC_AMBIGUOUS_MATCH_COMPOUND),
+ ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE ),
{
(OnigCodePoint )'\\' /* esc */
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */
@@ -286,11 +257,11 @@ OnigEncodingType OnigEncodingISO_8859_13 = {
onigenc_single_byte_mbc_to_code,
onigenc_single_byte_code_to_mbclen,
onigenc_single_byte_code_to_mbc,
- iso_8859_13_mbc_to_normalize,
- iso_8859_13_is_mbc_ambiguous,
- iso_8859_13_get_all_pair_ambig_codes,
+ mbc_to_normalize,
+ is_mbc_ambiguous,
+ get_all_pair_ambig_codes,
onigenc_ess_tsett_get_all_comp_ambig_codes,
- iso_8859_13_is_code_ctype,
+ is_code_ctype,
onigenc_not_support_get_ctype_code_range,
onigenc_single_byte_left_adjust_char_head,
onigenc_always_true_is_allowed_reverse_match
diff --git a/ext/mbstring/oniguruma/enc/iso8859_14.c b/ext/mbstring/oniguruma/enc/iso8859_14.c
index 4fe5ab29d1..44638cf13a 100644
--- a/ext/mbstring/oniguruma/enc/iso8859_14.c
+++ b/ext/mbstring/oniguruma/enc/iso8859_14.c
@@ -2,7 +2,7 @@
iso8859_14.c - Oniguruma (regular expression library)
**********************************************************************/
/*-
- * Copyright (c) 2002-2005 K.Kosako
+ * Copyright (c) 2002-2007 K.Kosako
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -33,7 +33,7 @@
#define ENC_IS_ISO_8859_14_CTYPE(code,ctype) \
((EncISO_8859_14_CtypeTable[code] & ctype) != 0)
-static UChar EncISO_8859_14_ToLowerCaseTable[256] = {
+static const UChar EncISO_8859_14_ToLowerCaseTable[256] = {
'\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
'\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017',
'\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027',
@@ -68,23 +68,23 @@ static UChar EncISO_8859_14_ToLowerCaseTable[256] = {
'\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377'
};
-static unsigned short EncISO_8859_14_CtypeTable[256] = {
+static const unsigned short EncISO_8859_14_CtypeTable[256] = {
0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
0x2008, 0x220c, 0x2209, 0x2208, 0x2208, 0x2208, 0x2008, 0x2008,
0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
- 0x2284, 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x21a0, 0x21a0, 0x21a0,
- 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
+ 0x2284, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
+ 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0,
- 0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x20a0, 0x20a0, 0x20a0, 0x21a0,
+ 0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
0x21a0, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x34a2,
0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
- 0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x31a0,
- 0x20a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2,
+ 0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x31a0,
+ 0x21a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2,
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
- 0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x20a0, 0x21a0, 0x20a0, 0x2008,
+ 0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x2008,
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
@@ -104,21 +104,11 @@ static unsigned short EncISO_8859_14_CtypeTable[256] = {
};
static int
-iso_8859_14_mbc_to_normalize(OnigAmbigType flag,
- const UChar** pp, const UChar* end, UChar* lower)
+mbc_to_normalize(OnigAmbigType flag,
+ const UChar** pp, const UChar* end, UChar* lower)
{
const UChar* p = *pp;
- if (end > p + 1 && (flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0) {
- if ((*p == 's' && *(p+1) == 's') ||
- ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
- (*p == 'S' && *(p+1) == 'S'))) {
- *lower = 0xdf;
- (*pp) += 2;
- return 1;
- }
- }
-
if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
ONIGENC_IS_MBC_ASCII(p)) ||
((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 &&
@@ -133,27 +123,10 @@ iso_8859_14_mbc_to_normalize(OnigAmbigType flag,
}
static int
-iso_8859_14_is_mbc_ambiguous(OnigAmbigType flag,
- const UChar** pp, const UChar* end)
+is_mbc_ambiguous(OnigAmbigType flag, const UChar** pp, const UChar* end)
{
const UChar* p = *pp;
- if ((flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0) {
- if (end > p + 1) {
- if ((*p == 's' && *(p+1) == 's') ||
- ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
- (*p == 'S' && *(p+1) == 'S'))) {
- (*pp) += 2;
- return TRUE;
- }
- }
-
- if (*p == 0xdf) {
- (*pp)++;
- return TRUE;
- }
- }
-
(*pp)++;
if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
ONIGENC_IS_MBC_ASCII(p)) ||
@@ -176,7 +149,7 @@ iso_8859_14_is_mbc_ambiguous(OnigAmbigType flag,
}
static int
-iso_8859_14_is_code_ctype(OnigCodePoint code, unsigned int ctype)
+is_code_ctype(OnigCodePoint code, unsigned int ctype)
{
if (code < 256)
return ENC_IS_ISO_8859_14_CTYPE(code, ctype);
@@ -185,103 +158,102 @@ iso_8859_14_is_code_ctype(OnigCodePoint code, unsigned int ctype)
}
static int
-iso_8859_14_get_all_pair_ambig_codes(OnigAmbigType flag,
- OnigPairAmbigCodes** ccs)
+get_all_pair_ambig_codes(OnigAmbigType flag, const OnigPairAmbigCodes** ccs)
{
- static OnigPairAmbigCodes cc[] = {
- { 0xa1, 0xa2 },
- { 0xa2, 0xa1 },
- { 0xa4, 0xa5 },
- { 0xa5, 0xa4 },
- { 0xa6, 0xab },
- { 0xa8, 0xb8 },
- { 0xaa, 0xba },
- { 0xab, 0xa6 },
- { 0xac, 0xbc },
- { 0xaf, 0xff },
+ static const OnigPairAmbigCodes cc[] = {
+ { 0xa1, 0xa2 },
+ { 0xa2, 0xa1 },
+ { 0xa4, 0xa5 },
+ { 0xa5, 0xa4 },
+ { 0xa6, 0xab },
+ { 0xa8, 0xb8 },
+ { 0xaa, 0xba },
+ { 0xab, 0xa6 },
+ { 0xac, 0xbc },
+ { 0xaf, 0xff },
- { 0xb0, 0xb1 },
- { 0xb1, 0xb0 },
- { 0xb2, 0xb3 },
- { 0xb3, 0xb2 },
- { 0xb4, 0xb5 },
- { 0xb5, 0xb4 },
- { 0xb7, 0xb9 },
- { 0xb8, 0xa8 },
- { 0xb9, 0xb7 },
- { 0xba, 0xaa },
- { 0xbb, 0xbf },
- { 0xbc, 0xac },
- { 0xbd, 0xbe },
- { 0xbe, 0xbd },
- { 0xbf, 0xbb },
+ { 0xb0, 0xb1 },
+ { 0xb1, 0xb0 },
+ { 0xb2, 0xb3 },
+ { 0xb3, 0xb2 },
+ { 0xb4, 0xb5 },
+ { 0xb5, 0xb4 },
+ { 0xb7, 0xb9 },
+ { 0xb8, 0xa8 },
+ { 0xb9, 0xb7 },
+ { 0xba, 0xaa },
+ { 0xbb, 0xbf },
+ { 0xbc, 0xac },
+ { 0xbd, 0xbe },
+ { 0xbe, 0xbd },
+ { 0xbf, 0xbb },
- { 0xc0, 0xe0 },
- { 0xc1, 0xe1 },
- { 0xc2, 0xe2 },
- { 0xc3, 0xe3 },
- { 0xc4, 0xe4 },
- { 0xc5, 0xe5 },
- { 0xc6, 0xe6 },
- { 0xc7, 0xe7 },
- { 0xc8, 0xe8 },
- { 0xc9, 0xe9 },
- { 0xca, 0xea },
- { 0xcb, 0xeb },
- { 0xcc, 0xec },
- { 0xcd, 0xed },
- { 0xce, 0xee },
- { 0xcf, 0xef },
+ { 0xc0, 0xe0 },
+ { 0xc1, 0xe1 },
+ { 0xc2, 0xe2 },
+ { 0xc3, 0xe3 },
+ { 0xc4, 0xe4 },
+ { 0xc5, 0xe5 },
+ { 0xc6, 0xe6 },
+ { 0xc7, 0xe7 },
+ { 0xc8, 0xe8 },
+ { 0xc9, 0xe9 },
+ { 0xca, 0xea },
+ { 0xcb, 0xeb },
+ { 0xcc, 0xec },
+ { 0xcd, 0xed },
+ { 0xce, 0xee },
+ { 0xcf, 0xef },
- { 0xd0, 0xf0 },
- { 0xd1, 0xf1 },
- { 0xd2, 0xf2 },
- { 0xd3, 0xf3 },
- { 0xd4, 0xf4 },
- { 0xd5, 0xf5 },
- { 0xd6, 0xf6 },
- { 0xd7, 0xf7 },
- { 0xd8, 0xf8 },
- { 0xd9, 0xf9 },
- { 0xda, 0xfa },
- { 0xdb, 0xfb },
- { 0xdc, 0xfc },
- { 0xdd, 0xfd },
- { 0xde, 0xfe },
+ { 0xd0, 0xf0 },
+ { 0xd1, 0xf1 },
+ { 0xd2, 0xf2 },
+ { 0xd3, 0xf3 },
+ { 0xd4, 0xf4 },
+ { 0xd5, 0xf5 },
+ { 0xd6, 0xf6 },
+ { 0xd7, 0xf7 },
+ { 0xd8, 0xf8 },
+ { 0xd9, 0xf9 },
+ { 0xda, 0xfa },
+ { 0xdb, 0xfb },
+ { 0xdc, 0xfc },
+ { 0xdd, 0xfd },
+ { 0xde, 0xfe },
- { 0xe0, 0xc0 },
- { 0xe1, 0xc1 },
- { 0xe2, 0xc2 },
- { 0xe3, 0xc3 },
- { 0xe4, 0xc4 },
- { 0xe5, 0xc5 },
- { 0xe6, 0xc6 },
- { 0xe7, 0xc7 },
- { 0xe8, 0xc8 },
- { 0xe9, 0xc9 },
- { 0xea, 0xca },
- { 0xeb, 0xcb },
- { 0xec, 0xcc },
- { 0xed, 0xcd },
- { 0xee, 0xce },
- { 0xef, 0xcf },
+ { 0xe0, 0xc0 },
+ { 0xe1, 0xc1 },
+ { 0xe2, 0xc2 },
+ { 0xe3, 0xc3 },
+ { 0xe4, 0xc4 },
+ { 0xe5, 0xc5 },
+ { 0xe6, 0xc6 },
+ { 0xe7, 0xc7 },
+ { 0xe8, 0xc8 },
+ { 0xe9, 0xc9 },
+ { 0xea, 0xca },
+ { 0xeb, 0xcb },
+ { 0xec, 0xcc },
+ { 0xed, 0xcd },
+ { 0xee, 0xce },
+ { 0xef, 0xcf },
- { 0xf0, 0xd0 },
- { 0xf1, 0xd1 },
- { 0xf2, 0xd2 },
- { 0xf3, 0xd3 },
- { 0xf4, 0xd4 },
- { 0xf5, 0xd5 },
- { 0xf6, 0xd6 },
- { 0xf7, 0xd7 },
- { 0xf8, 0xd8 },
- { 0xf9, 0xd9 },
- { 0xfa, 0xda },
- { 0xfb, 0xdb },
- { 0xfc, 0xdc },
- { 0xfd, 0xdd },
- { 0xfe, 0xde },
- { 0xff, 0xaf }
+ { 0xf0, 0xd0 },
+ { 0xf1, 0xd1 },
+ { 0xf2, 0xd2 },
+ { 0xf3, 0xd3 },
+ { 0xf4, 0xd4 },
+ { 0xf5, 0xd5 },
+ { 0xf6, 0xd6 },
+ { 0xf7, 0xd7 },
+ { 0xf8, 0xd8 },
+ { 0xf9, 0xd9 },
+ { 0xfa, 0xda },
+ { 0xfb, 0xdb },
+ { 0xfc, 0xdc },
+ { 0xfd, 0xdd },
+ { 0xfe, 0xde },
+ { 0xff, 0xaf }
};
if (flag == ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) {
@@ -302,8 +274,7 @@ OnigEncodingType OnigEncodingISO_8859_14 = {
1, /* max enc length */
1, /* min enc length */
(ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE |
- ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE |
- ONIGENC_AMBIGUOUS_MATCH_COMPOUND),
+ ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE ),
{
(OnigCodePoint )'\\' /* esc */
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */
@@ -316,11 +287,11 @@ OnigEncodingType OnigEncodingISO_8859_14 = {
onigenc_single_byte_mbc_to_code,
onigenc_single_byte_code_to_mbclen,
onigenc_single_byte_code_to_mbc,
- iso_8859_14_mbc_to_normalize,
- iso_8859_14_is_mbc_ambiguous,
- iso_8859_14_get_all_pair_ambig_codes,
+ mbc_to_normalize,
+ is_mbc_ambiguous,
+ get_all_pair_ambig_codes,
onigenc_ess_tsett_get_all_comp_ambig_codes,
- iso_8859_14_is_code_ctype,
+ is_code_ctype,
onigenc_not_support_get_ctype_code_range,
onigenc_single_byte_left_adjust_char_head,
onigenc_always_true_is_allowed_reverse_match
diff --git a/ext/mbstring/oniguruma/enc/iso8859_15.c b/ext/mbstring/oniguruma/enc/iso8859_15.c
index 1a8bd7b4c5..f643b895df 100644
--- a/ext/mbstring/oniguruma/enc/iso8859_15.c
+++ b/ext/mbstring/oniguruma/enc/iso8859_15.c
@@ -2,7 +2,7 @@
iso8859_15.c - Oniguruma (regular expression library)
**********************************************************************/
/*-
- * Copyright (c) 2002-2005 K.Kosako
+ * Copyright (c) 2002-2007 K.Kosako
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -33,7 +33,7 @@
#define ENC_IS_ISO_8859_15_CTYPE(code,ctype) \
((EncISO_8859_15_CtypeTable[code] & ctype) != 0)
-static UChar EncISO_8859_15_ToLowerCaseTable[256] = {
+static const UChar EncISO_8859_15_ToLowerCaseTable[256] = {
'\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
'\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017',
'\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027',
@@ -68,23 +68,23 @@ static UChar EncISO_8859_15_ToLowerCaseTable[256] = {
'\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377'
};
-static unsigned short EncISO_8859_15_CtypeTable[256] = {
+static const unsigned short EncISO_8859_15_CtypeTable[256] = {
0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
0x2008, 0x220c, 0x2209, 0x2208, 0x2208, 0x2208, 0x2008, 0x2008,
0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
- 0x2284, 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x21a0, 0x21a0, 0x21a0,
- 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
+ 0x2284, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
+ 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0,
- 0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x20a0, 0x20a0, 0x20a0, 0x21a0,
+ 0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
0x21a0, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x34a2,
0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
- 0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x31a0,
- 0x20a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2,
+ 0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x31a0,
+ 0x21a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2,
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
- 0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x20a0, 0x21a0, 0x20a0, 0x2008,
+ 0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x2008,
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
@@ -104,21 +104,11 @@ static unsigned short EncISO_8859_15_CtypeTable[256] = {
};
static int
-iso_8859_15_mbc_to_normalize(OnigAmbigType flag,
- const UChar** pp, const UChar* end, UChar* lower)
+mbc_to_normalize(OnigAmbigType flag,
+ const UChar** pp, const UChar* end, UChar* lower)
{
const UChar* p = *pp;
- if (end > p + 1 && (flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0) {
- if ((*p == 's' && *(p+1) == 's') ||
- ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
- (*p == 'S' && *(p+1) == 'S'))) {
- *lower = 0xdf;
- (*pp) += 2;
- return 1;
- }
- }
-
if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
ONIGENC_IS_MBC_ASCII(p)) ||
((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 &&
@@ -133,27 +123,10 @@ iso_8859_15_mbc_to_normalize(OnigAmbigType flag,
}
static int
-iso_8859_15_is_mbc_ambiguous(OnigAmbigType flag,
- const UChar** pp, const UChar* end)
+is_mbc_ambiguous(OnigAmbigType flag, const UChar** pp, const UChar* end)
{
const UChar* p = *pp;
- if ((flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0) {
- if (end > p + 1) {
- if ((*p == 's' && *(p+1) == 's') ||
- ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
- (*p == 'S' && *(p+1) == 'S'))) {
- (*pp) += 2;
- return TRUE;
- }
- }
-
- if (*p == 0xdf) {
- (*pp)++;
- return TRUE;
- }
- }
-
(*pp)++;
if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
ONIGENC_IS_MBC_ASCII(p)) ||
@@ -176,7 +149,7 @@ iso_8859_15_is_mbc_ambiguous(OnigAmbigType flag,
}
static int
-iso_8859_15_is_code_ctype(OnigCodePoint code, unsigned int ctype)
+is_code_ctype(OnigCodePoint code, unsigned int ctype)
{
if (code < 256)
return ENC_IS_ISO_8859_15_CTYPE(code, ctype);
@@ -185,10 +158,10 @@ iso_8859_15_is_code_ctype(OnigCodePoint code, unsigned int ctype)
}
static int
-iso_8859_15_get_all_pair_ambig_codes(OnigAmbigType flag,
- OnigPairAmbigCodes** ccs)
+get_all_pair_ambig_codes(OnigAmbigType flag,
+ const OnigPairAmbigCodes** ccs)
{
- static OnigPairAmbigCodes cc[] = {
+ static const OnigPairAmbigCodes cc[] = {
{ 0xa6, 0xa8 },
{ 0xa8, 0xa6 },
@@ -282,8 +255,7 @@ OnigEncodingType OnigEncodingISO_8859_15 = {
1, /* max enc length */
1, /* min enc length */
(ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE |
- ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE |
- ONIGENC_AMBIGUOUS_MATCH_COMPOUND),
+ ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE ),
{
(OnigCodePoint )'\\' /* esc */
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */
@@ -296,11 +268,11 @@ OnigEncodingType OnigEncodingISO_8859_15 = {
onigenc_single_byte_mbc_to_code,
onigenc_single_byte_code_to_mbclen,
onigenc_single_byte_code_to_mbc,
- iso_8859_15_mbc_to_normalize,
- iso_8859_15_is_mbc_ambiguous,
- iso_8859_15_get_all_pair_ambig_codes,
+ mbc_to_normalize,
+ is_mbc_ambiguous,
+ get_all_pair_ambig_codes,
onigenc_ess_tsett_get_all_comp_ambig_codes,
- iso_8859_15_is_code_ctype,
+ is_code_ctype,
onigenc_not_support_get_ctype_code_range,
onigenc_single_byte_left_adjust_char_head,
onigenc_always_true_is_allowed_reverse_match
diff --git a/ext/mbstring/oniguruma/enc/iso8859_16.c b/ext/mbstring/oniguruma/enc/iso8859_16.c
index e283db17cc..921ae36d9d 100644
--- a/ext/mbstring/oniguruma/enc/iso8859_16.c
+++ b/ext/mbstring/oniguruma/enc/iso8859_16.c
@@ -2,7 +2,7 @@
iso8859_16.c - Oniguruma (regular expression library)
**********************************************************************/
/*-
- * Copyright (c) 2002-2005 K.Kosako
+ * Copyright (c) 2002-2007 K.Kosako
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -33,7 +33,7 @@
#define ENC_IS_ISO_8859_16_CTYPE(code,ctype) \
((EncISO_8859_16_CtypeTable[code] & ctype) != 0)
-static UChar EncISO_8859_16_ToLowerCaseTable[256] = {
+static const UChar EncISO_8859_16_ToLowerCaseTable[256] = {
'\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
'\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017',
'\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027',
@@ -68,23 +68,23 @@ static UChar EncISO_8859_16_ToLowerCaseTable[256] = {
'\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377'
};
-static unsigned short EncISO_8859_16_CtypeTable[256] = {
+static const unsigned short EncISO_8859_16_CtypeTable[256] = {
0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
0x2008, 0x220c, 0x2209, 0x2208, 0x2208, 0x2208, 0x2008, 0x2008,
0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
- 0x2284, 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x21a0, 0x21a0, 0x21a0,
- 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
+ 0x2284, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
+ 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0,
- 0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x20a0, 0x20a0, 0x20a0, 0x21a0,
+ 0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
0x21a0, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x34a2,
0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
- 0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x31a0,
- 0x20a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2,
+ 0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x31a0,
+ 0x21a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2,
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
- 0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x20a0, 0x21a0, 0x20a0, 0x2008,
+ 0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x2008,
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
@@ -104,21 +104,11 @@ static unsigned short EncISO_8859_16_CtypeTable[256] = {
};
static int
-iso_8859_16_mbc_to_normalize(OnigAmbigType flag,
- const UChar** pp, const UChar* end, UChar* lower)
+mbc_to_normalize(OnigAmbigType flag,
+ const UChar** pp, const UChar* end, UChar* lower)
{
const UChar* p = *pp;
- if (end > p + 1 && (flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0) {
- if ((*p == 's' && *(p+1) == 's') ||
- ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
- (*p == 'S' && *(p+1) == 'S'))) {
- *lower = 0xdf;
- (*pp) += 2;
- return 1;
- }
- }
-
if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
ONIGENC_IS_MBC_ASCII(p)) ||
((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 &&
@@ -133,27 +123,10 @@ iso_8859_16_mbc_to_normalize(OnigAmbigType flag,
}
static int
-iso_8859_16_is_mbc_ambiguous(OnigAmbigType flag,
- const UChar** pp, const UChar* end)
+is_mbc_ambiguous(OnigAmbigType flag, const UChar** pp, const UChar* end)
{
const UChar* p = *pp;
- if ((flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0) {
- if (end > p + 1) {
- if ((*p == 's' && *(p+1) == 's') ||
- ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
- (*p == 'S' && *(p+1) == 'S'))) {
- (*pp) += 2;
- return TRUE;
- }
- }
-
- if (*p == 0xdf) {
- (*pp)++;
- return TRUE;
- }
- }
-
(*pp)++;
if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
ONIGENC_IS_MBC_ASCII(p)) ||
@@ -176,7 +149,7 @@ iso_8859_16_is_mbc_ambiguous(OnigAmbigType flag,
}
static int
-iso_8859_16_is_code_ctype(OnigCodePoint code, unsigned int ctype)
+is_code_ctype(OnigCodePoint code, unsigned int ctype)
{
if (code < 256)
return ENC_IS_ISO_8859_16_CTYPE(code, ctype);
@@ -185,97 +158,96 @@ iso_8859_16_is_code_ctype(OnigCodePoint code, unsigned int ctype)
}
static int
-iso_8859_16_get_all_pair_ambig_codes(OnigAmbigType flag,
- OnigPairAmbigCodes** ccs)
+get_all_pair_ambig_codes(OnigAmbigType flag, const OnigPairAmbigCodes** ccs)
{
- static OnigPairAmbigCodes cc[] = {
- { 0xa1, 0xa2 },
- { 0xa2, 0xa1 },
- { 0xa3, 0xb3 },
- { 0xa6, 0xa8 },
- { 0xa8, 0xa6 },
- { 0xaa, 0xba },
- { 0xac, 0xae },
- { 0xae, 0xac },
- { 0xaf, 0xbf },
+ static const OnigPairAmbigCodes cc[] = {
+ { 0xa1, 0xa2 },
+ { 0xa2, 0xa1 },
+ { 0xa3, 0xb3 },
+ { 0xa6, 0xa8 },
+ { 0xa8, 0xa6 },
+ { 0xaa, 0xba },
+ { 0xac, 0xae },
+ { 0xae, 0xac },
+ { 0xaf, 0xbf },
- { 0xb2, 0xb9 },
- { 0xb3, 0xa3 },
- { 0xb4, 0xb8 },
- { 0xb8, 0xb4 },
- { 0xb9, 0xb2 },
- { 0xba, 0xaa },
- { 0xbc, 0xbd },
- { 0xbd, 0xbc },
- { 0xbe, 0xff },
- { 0xbf, 0xaf },
+ { 0xb2, 0xb9 },
+ { 0xb3, 0xa3 },
+ { 0xb4, 0xb8 },
+ { 0xb8, 0xb4 },
+ { 0xb9, 0xb2 },
+ { 0xba, 0xaa },
+ { 0xbc, 0xbd },
+ { 0xbd, 0xbc },
+ { 0xbe, 0xff },
+ { 0xbf, 0xaf },
- { 0xc0, 0xe0 },
- { 0xc1, 0xe1 },
- { 0xc2, 0xe2 },
- { 0xc3, 0xe3 },
- { 0xc4, 0xe4 },
- { 0xc5, 0xe5 },
- { 0xc6, 0xe6 },
- { 0xc7, 0xe7 },
- { 0xc8, 0xe8 },
- { 0xc9, 0xe9 },
- { 0xca, 0xea },
- { 0xcb, 0xeb },
- { 0xcc, 0xec },
- { 0xcd, 0xed },
- { 0xce, 0xee },
- { 0xcf, 0xef },
+ { 0xc0, 0xe0 },
+ { 0xc1, 0xe1 },
+ { 0xc2, 0xe2 },
+ { 0xc3, 0xe3 },
+ { 0xc4, 0xe4 },
+ { 0xc5, 0xe5 },
+ { 0xc6, 0xe6 },
+ { 0xc7, 0xe7 },
+ { 0xc8, 0xe8 },
+ { 0xc9, 0xe9 },
+ { 0xca, 0xea },
+ { 0xcb, 0xeb },
+ { 0xcc, 0xec },
+ { 0xcd, 0xed },
+ { 0xce, 0xee },
+ { 0xcf, 0xef },
- { 0xd0, 0xf0 },
- { 0xd1, 0xf1 },
- { 0xd2, 0xf2 },
- { 0xd3, 0xf3 },
- { 0xd4, 0xf4 },
- { 0xd5, 0xf5 },
- { 0xd6, 0xf6 },
- { 0xd7, 0xf7 },
- { 0xd8, 0xf8 },
- { 0xd9, 0xf9 },
- { 0xda, 0xfa },
- { 0xdb, 0xfb },
- { 0xdc, 0xfc },
- { 0xdd, 0xfd },
- { 0xde, 0xfe },
+ { 0xd0, 0xf0 },
+ { 0xd1, 0xf1 },
+ { 0xd2, 0xf2 },
+ { 0xd3, 0xf3 },
+ { 0xd4, 0xf4 },
+ { 0xd5, 0xf5 },
+ { 0xd6, 0xf6 },
+ { 0xd7, 0xf7 },
+ { 0xd8, 0xf8 },
+ { 0xd9, 0xf9 },
+ { 0xda, 0xfa },
+ { 0xdb, 0xfb },
+ { 0xdc, 0xfc },
+ { 0xdd, 0xfd },
+ { 0xde, 0xfe },
- { 0xe0, 0xc0 },
- { 0xe1, 0xc1 },
- { 0xe2, 0xc2 },
- { 0xe3, 0xc3 },
- { 0xe4, 0xc4 },
- { 0xe5, 0xc5 },
- { 0xe6, 0xc6 },
- { 0xe7, 0xc7 },
- { 0xe8, 0xc8 },
- { 0xe9, 0xc9 },
- { 0xea, 0xca },
- { 0xeb, 0xcb },
- { 0xec, 0xcc },
- { 0xed, 0xcd },
- { 0xee, 0xce },
- { 0xef, 0xcf },
+ { 0xe0, 0xc0 },
+ { 0xe1, 0xc1 },
+ { 0xe2, 0xc2 },
+ { 0xe3, 0xc3 },
+ { 0xe4, 0xc4 },
+ { 0xe5, 0xc5 },
+ { 0xe6, 0xc6 },
+ { 0xe7, 0xc7 },
+ { 0xe8, 0xc8 },
+ { 0xe9, 0xc9 },
+ { 0xea, 0xca },
+ { 0xeb, 0xcb },
+ { 0xec, 0xcc },
+ { 0xed, 0xcd },
+ { 0xee, 0xce },
+ { 0xef, 0xcf },
- { 0xf0, 0xd0 },
- { 0xf1, 0xd1 },
- { 0xf2, 0xd2 },
- { 0xf3, 0xd3 },
- { 0xf4, 0xd4 },
- { 0xf5, 0xd5 },
- { 0xf6, 0xd6 },
- { 0xf7, 0xd7 },
- { 0xf8, 0xd8 },
- { 0xf9, 0xd9 },
- { 0xfa, 0xda },
- { 0xfb, 0xdb },
- { 0xfc, 0xdc },
- { 0xfd, 0xdd },
- { 0xfe, 0xde },
- { 0xff, 0xbe }
+ { 0xf0, 0xd0 },
+ { 0xf1, 0xd1 },
+ { 0xf2, 0xd2 },
+ { 0xf3, 0xd3 },
+ { 0xf4, 0xd4 },
+ { 0xf5, 0xd5 },
+ { 0xf6, 0xd6 },
+ { 0xf7, 0xd7 },
+ { 0xf8, 0xd8 },
+ { 0xf9, 0xd9 },
+ { 0xfa, 0xda },
+ { 0xfb, 0xdb },
+ { 0xfc, 0xdc },
+ { 0xfd, 0xdd },
+ { 0xfe, 0xde },
+ { 0xff, 0xbe }
};
if (flag == ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) {
@@ -296,8 +268,7 @@ OnigEncodingType OnigEncodingISO_8859_16 = {
1, /* max enc length */
1, /* min enc length */
(ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE |
- ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE |
- ONIGENC_AMBIGUOUS_MATCH_COMPOUND),
+ ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE ),
{
(OnigCodePoint )'\\' /* esc */
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */
@@ -310,11 +281,11 @@ OnigEncodingType OnigEncodingISO_8859_16 = {
onigenc_single_byte_mbc_to_code,
onigenc_single_byte_code_to_mbclen,
onigenc_single_byte_code_to_mbc,
- iso_8859_16_mbc_to_normalize,
- iso_8859_16_is_mbc_ambiguous,
- iso_8859_16_get_all_pair_ambig_codes,
+ mbc_to_normalize,
+ is_mbc_ambiguous,
+ get_all_pair_ambig_codes,
onigenc_ess_tsett_get_all_comp_ambig_codes,
- iso_8859_16_is_code_ctype,
+ is_code_ctype,
onigenc_not_support_get_ctype_code_range,
onigenc_single_byte_left_adjust_char_head,
onigenc_always_true_is_allowed_reverse_match
diff --git a/ext/mbstring/oniguruma/enc/iso8859_2.c b/ext/mbstring/oniguruma/enc/iso8859_2.c
index e86415b9c9..f8cb3756f2 100644
--- a/ext/mbstring/oniguruma/enc/iso8859_2.c
+++ b/ext/mbstring/oniguruma/enc/iso8859_2.c
@@ -2,7 +2,7 @@
iso8859_2.c - Oniguruma (regular expression library)
**********************************************************************/
/*-
- * Copyright (c) 2002-2005 K.Kosako
+ * Copyright (c) 2002-2006 K.Kosako
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -33,7 +33,7 @@
#define ENC_IS_ISO_8859_2_CTYPE(code,ctype) \
((EncISO_8859_2_CtypeTable[code] & ctype) != 0)
-static UChar EncISO_8859_2_ToLowerCaseTable[256] = {
+static const UChar EncISO_8859_2_ToLowerCaseTable[256] = {
'\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
'\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017',
'\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027',
@@ -68,23 +68,23 @@ static UChar EncISO_8859_2_ToLowerCaseTable[256] = {
'\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377'
};
-static unsigned short EncISO_8859_2_CtypeTable[256] = {
+static const unsigned short EncISO_8859_2_CtypeTable[256] = {
0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
0x2008, 0x220c, 0x2209, 0x2208, 0x2208, 0x2208, 0x2008, 0x2008,
0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
- 0x2284, 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x21a0, 0x21a0, 0x21a0,
- 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
+ 0x2284, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
+ 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0,
- 0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x20a0, 0x20a0, 0x20a0, 0x21a0,
+ 0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
0x21a0, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x34a2,
0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
- 0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x31a0,
- 0x20a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2,
+ 0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x31a0,
+ 0x21a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2,
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
- 0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x20a0, 0x21a0, 0x20a0, 0x2008,
+ 0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x2008,
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
@@ -109,16 +109,6 @@ iso_8859_2_mbc_to_normalize(OnigAmbigType flag,
{
const UChar* p = *pp;
- if (end > p + 1 && (flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0) {
- if ((*p == 's' && *(p+1) == 's') ||
- ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
- (*p == 'S' && *(p+1) == 'S'))) {
- *lower = 0xdf;
- (*pp) += 2;
- return 1;
- }
- }
-
if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
ONIGENC_IS_MBC_ASCII(p)) ||
((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 &&
@@ -138,22 +128,6 @@ iso_8859_2_is_mbc_ambiguous(OnigAmbigType flag,
{
const UChar* p = *pp;
- if ((flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0) {
- if (end > p + 1) {
- if ((*p == 's' && *(p+1) == 's') ||
- ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
- (*p == 'S' && *(p+1) == 'S'))) {
- (*pp) += 2;
- return TRUE;
- }
- }
-
- if (*p == 0xdf) {
- (*pp)++;
- return TRUE;
- }
- }
-
(*pp)++;
if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
ONIGENC_IS_MBC_ASCII(p)) ||
@@ -177,9 +151,9 @@ iso_8859_2_is_mbc_ambiguous(OnigAmbigType flag,
static int
iso_8859_2_get_all_pair_ambig_codes(OnigAmbigType flag,
- OnigPairAmbigCodes** ccs)
+ const OnigPairAmbigCodes** ccs)
{
- static OnigPairAmbigCodes cc[] = {
+ static const OnigPairAmbigCodes cc[] = {
{ 0xa1, 0xb1 },
{ 0xa3, 0xb3 },
{ 0xa5, 0xb5 },
@@ -294,8 +268,7 @@ OnigEncodingType OnigEncodingISO_8859_2 = {
1, /* max enc length */
1, /* min enc length */
(ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE |
- ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE |
- ONIGENC_AMBIGUOUS_MATCH_COMPOUND),
+ ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE ),
{
(OnigCodePoint )'\\' /* esc */
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */
diff --git a/ext/mbstring/oniguruma/enc/iso8859_3.c b/ext/mbstring/oniguruma/enc/iso8859_3.c
index 76d2bec8a8..e62d20de7b 100644
--- a/ext/mbstring/oniguruma/enc/iso8859_3.c
+++ b/ext/mbstring/oniguruma/enc/iso8859_3.c
@@ -2,7 +2,7 @@
iso8859_3.c - Oniguruma (regular expression library)
**********************************************************************/
/*-
- * Copyright (c) 2002-2005 K.Kosako
+ * Copyright (c) 2002-2006 K.Kosako
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -33,7 +33,7 @@
#define ENC_IS_ISO_8859_3_CTYPE(code,ctype) \
((EncISO_8859_3_CtypeTable[code] & ctype) != 0)
-static UChar EncISO_8859_3_ToLowerCaseTable[256] = {
+static const UChar EncISO_8859_3_ToLowerCaseTable[256] = {
'\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
'\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017',
'\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027',
@@ -68,23 +68,23 @@ static UChar EncISO_8859_3_ToLowerCaseTable[256] = {
'\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377'
};
-static unsigned short EncISO_8859_3_CtypeTable[256] = {
+static const unsigned short EncISO_8859_3_CtypeTable[256] = {
0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
0x2008, 0x220c, 0x2209, 0x2208, 0x2208, 0x2208, 0x2008, 0x2008,
0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
- 0x2284, 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x21a0, 0x21a0, 0x21a0,
- 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
+ 0x2284, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
+ 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0,
- 0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x20a0, 0x20a0, 0x20a0, 0x21a0,
+ 0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
0x21a0, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x34a2,
0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
- 0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x31a0,
- 0x20a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2,
+ 0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x31a0,
+ 0x21a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2,
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
- 0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x20a0, 0x21a0, 0x20a0, 0x2008,
+ 0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x2008,
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
@@ -109,16 +109,6 @@ iso_8859_3_mbc_to_normalize(OnigAmbigType flag,
{
const UChar* p = *pp;
- if (end > p + 1 && (flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0) {
- if ((*p == 's' && *(p+1) == 's') ||
- ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
- (*p == 'S' && *(p+1) == 'S'))) {
- *lower = 0xdf;
- (*pp) += 2;
- return 1;
- }
- }
-
if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
ONIGENC_IS_MBC_ASCII(p)) ||
((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 &&
@@ -138,22 +128,6 @@ iso_8859_3_is_mbc_ambiguous(OnigAmbigType flag,
{
const UChar* p = *pp;
- if ((flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0) {
- if (end > p + 1) {
- if ((*p == 's' && *(p+1) == 's') ||
- ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
- (*p == 'S' && *(p+1) == 'S'))) {
- (*pp) += 2;
- return TRUE;
- }
- }
-
- if (*p == 0xdf) {
- (*pp)++;
- return TRUE;
- }
- }
-
(*pp)++;
if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
ONIGENC_IS_MBC_ASCII(p)) ||
@@ -186,9 +160,9 @@ iso_8859_3_is_code_ctype(OnigCodePoint code, unsigned int ctype)
static int
iso_8859_3_get_all_pair_ambig_codes(OnigAmbigType flag,
- OnigPairAmbigCodes** ccs)
+ const OnigPairAmbigCodes** ccs)
{
- static OnigPairAmbigCodes cc[] = {
+ static const OnigPairAmbigCodes cc[] = {
{ 0xa1, 0xb1 },
{ 0xa6, 0xb6 },
{ 0xa9, 0xb9 },
@@ -283,8 +257,7 @@ OnigEncodingType OnigEncodingISO_8859_3 = {
1, /* max enc length */
1, /* min enc length */
(ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE |
- ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE |
- ONIGENC_AMBIGUOUS_MATCH_COMPOUND),
+ ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE ),
{
(OnigCodePoint )'\\' /* esc */
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */
diff --git a/ext/mbstring/oniguruma/enc/iso8859_4.c b/ext/mbstring/oniguruma/enc/iso8859_4.c
index 7569006725..dd6bd7dfe3 100644
--- a/ext/mbstring/oniguruma/enc/iso8859_4.c
+++ b/ext/mbstring/oniguruma/enc/iso8859_4.c
@@ -2,7 +2,7 @@
iso8859_4.c - Oniguruma (regular expression library)
**********************************************************************/
/*-
- * Copyright (c) 2002-2005 K.Kosako
+ * Copyright (c) 2002-2006 K.Kosako
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -33,7 +33,7 @@
#define ENC_IS_ISO_8859_4_CTYPE(code,ctype) \
((EncISO_8859_4_CtypeTable[code] & ctype) != 0)
-static UChar EncISO_8859_4_ToLowerCaseTable[256] = {
+static const UChar EncISO_8859_4_ToLowerCaseTable[256] = {
'\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
'\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017',
'\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027',
@@ -68,23 +68,23 @@ static UChar EncISO_8859_4_ToLowerCaseTable[256] = {
'\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377'
};
-static unsigned short EncISO_8859_4_CtypeTable[256] = {
+static const unsigned short EncISO_8859_4_CtypeTable[256] = {
0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
0x2008, 0x220c, 0x2209, 0x2208, 0x2208, 0x2208, 0x2008, 0x2008,
0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
- 0x2284, 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x21a0, 0x21a0, 0x21a0,
- 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
+ 0x2284, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
+ 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0,
- 0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x20a0, 0x20a0, 0x20a0, 0x21a0,
+ 0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
0x21a0, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x34a2,
0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
- 0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x31a0,
- 0x20a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2,
+ 0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x31a0,
+ 0x21a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2,
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
- 0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x20a0, 0x21a0, 0x20a0, 0x2008,
+ 0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x2008,
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
@@ -109,16 +109,6 @@ iso_8859_4_mbc_to_normalize(OnigAmbigType flag,
{
const UChar* p = *pp;
- if (end > p + 1 && (flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0) {
- if ((*p == 's' && *(p+1) == 's') ||
- ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
- (*p == 'S' && *(p+1) == 'S'))) {
- *lower = 0xdf;
- (*pp) += 2;
- return 1;
- }
- }
-
if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
ONIGENC_IS_MBC_ASCII(p)) ||
((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 &&
@@ -138,22 +128,6 @@ iso_8859_4_is_mbc_ambiguous(OnigAmbigType flag,
{
const UChar* p = *pp;
- if ((flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0) {
- if (end > p + 1) {
- if ((*p == 's' && *(p+1) == 's') ||
- ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
- (*p == 'S' && *(p+1) == 'S'))) {
- (*pp) += 2;
- return TRUE;
- }
- }
-
- if (*p == 0xdf) {
- (*pp)++;
- return TRUE;
- }
- }
-
(*pp)++;
if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
ONIGENC_IS_MBC_ASCII(p)) ||
@@ -186,9 +160,9 @@ iso_8859_4_is_code_ctype(OnigCodePoint code, unsigned int ctype)
static int
iso_8859_4_get_all_pair_ambig_codes(OnigAmbigType flag,
- OnigPairAmbigCodes** ccs)
+ const OnigPairAmbigCodes** ccs)
{
- static OnigPairAmbigCodes cc[] = {
+ static const OnigPairAmbigCodes cc[] = {
{ 0xa1, 0xb1 },
{ 0xa3, 0xb3 },
{ 0xa5, 0xb5 },
@@ -292,8 +266,7 @@ OnigEncodingType OnigEncodingISO_8859_4 = {
1, /* max enc length */
1, /* min enc length */
(ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE |
- ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE |
- ONIGENC_AMBIGUOUS_MATCH_COMPOUND),
+ ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE ),
{
(OnigCodePoint )'\\' /* esc */
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */
diff --git a/ext/mbstring/oniguruma/enc/iso8859_5.c b/ext/mbstring/oniguruma/enc/iso8859_5.c
index 2f7677b3e7..87b7fb8a29 100644
--- a/ext/mbstring/oniguruma/enc/iso8859_5.c
+++ b/ext/mbstring/oniguruma/enc/iso8859_5.c
@@ -33,7 +33,7 @@
#define ENC_IS_ISO_8859_5_CTYPE(code,ctype) \
((EncISO_8859_5_CtypeTable[code] & ctype) != 0)
-static UChar EncISO_8859_5_ToLowerCaseTable[256] = {
+static const UChar EncISO_8859_5_ToLowerCaseTable[256] = {
'\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
'\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017',
'\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027',
@@ -68,23 +68,23 @@ static UChar EncISO_8859_5_ToLowerCaseTable[256] = {
'\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377'
};
-static unsigned short EncISO_8859_5_CtypeTable[256] = {
+static const unsigned short EncISO_8859_5_CtypeTable[256] = {
0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
0x2008, 0x220c, 0x2209, 0x2208, 0x2208, 0x2208, 0x2008, 0x2008,
0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
- 0x2284, 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x21a0, 0x21a0, 0x21a0,
- 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
+ 0x2284, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
+ 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0,
- 0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x20a0, 0x20a0, 0x20a0, 0x21a0,
+ 0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
0x21a0, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x34a2,
0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
- 0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x31a0,
- 0x20a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2,
+ 0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x31a0,
+ 0x21a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2,
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
- 0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x20a0, 0x21a0, 0x20a0, 0x2008,
+ 0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x2008,
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
@@ -152,9 +152,9 @@ iso_8859_5_is_code_ctype(OnigCodePoint code, unsigned int ctype)
static int
iso_8859_5_get_all_pair_ambig_codes(OnigAmbigType flag,
- OnigPairAmbigCodes** ccs)
+ const OnigPairAmbigCodes** ccs)
{
- static OnigPairAmbigCodes cc[] = {
+ static const OnigPairAmbigCodes cc[] = {
{ 0xa1, 0xf1 },
{ 0xa2, 0xf2 },
{ 0xa3, 0xf3 },
diff --git a/ext/mbstring/oniguruma/enc/iso8859_6.c b/ext/mbstring/oniguruma/enc/iso8859_6.c
index 0fcb9e8b83..fffcd0e7d1 100644
--- a/ext/mbstring/oniguruma/enc/iso8859_6.c
+++ b/ext/mbstring/oniguruma/enc/iso8859_6.c
@@ -32,23 +32,23 @@
#define ENC_IS_ISO_8859_6_CTYPE(code,ctype) \
((EncISO_8859_6_CtypeTable[code] & ctype) != 0)
-static unsigned short EncISO_8859_6_CtypeTable[256] = {
+static const unsigned short EncISO_8859_6_CtypeTable[256] = {
0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
0x2008, 0x220c, 0x2209, 0x2208, 0x2208, 0x2208, 0x2008, 0x2008,
0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
- 0x2284, 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x21a0, 0x21a0, 0x21a0,
- 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
+ 0x2284, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
+ 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0,
- 0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x20a0, 0x20a0, 0x20a0, 0x21a0,
+ 0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
0x21a0, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x34a2,
0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
- 0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x31a0,
- 0x20a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2,
+ 0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x31a0,
+ 0x21a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2,
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
- 0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x20a0, 0x21a0, 0x20a0, 0x2008,
+ 0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x2008,
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
diff --git a/ext/mbstring/oniguruma/enc/iso8859_7.c b/ext/mbstring/oniguruma/enc/iso8859_7.c
index 8b2cb9ec59..e87661d84b 100644
--- a/ext/mbstring/oniguruma/enc/iso8859_7.c
+++ b/ext/mbstring/oniguruma/enc/iso8859_7.c
@@ -33,7 +33,7 @@
#define ENC_IS_ISO_8859_7_CTYPE(code,ctype) \
((EncISO_8859_7_CtypeTable[code] & ctype) != 0)
-static UChar EncISO_8859_7_ToLowerCaseTable[256] = {
+static const UChar EncISO_8859_7_ToLowerCaseTable[256] = {
'\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
'\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017',
'\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027',
@@ -68,23 +68,23 @@ static UChar EncISO_8859_7_ToLowerCaseTable[256] = {
'\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377'
};
-static unsigned short EncISO_8859_7_CtypeTable[256] = {
+static const unsigned short EncISO_8859_7_CtypeTable[256] = {
0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
0x2008, 0x220c, 0x2209, 0x2208, 0x2208, 0x2208, 0x2008, 0x2008,
0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
- 0x2284, 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x21a0, 0x21a0, 0x21a0,
- 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
+ 0x2284, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
+ 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0,
- 0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x20a0, 0x20a0, 0x20a0, 0x21a0,
+ 0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
0x21a0, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x34a2,
0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
- 0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x31a0,
- 0x20a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2,
+ 0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x31a0,
+ 0x21a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2,
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
- 0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x20a0, 0x21a0, 0x20a0, 0x2008,
+ 0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x2008,
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
@@ -159,9 +159,9 @@ iso_8859_7_is_code_ctype(OnigCodePoint code, unsigned int ctype)
static int
iso_8859_7_get_all_pair_ambig_codes(OnigAmbigType flag,
- OnigPairAmbigCodes** ccs)
+ const OnigPairAmbigCodes** ccs)
{
- static OnigPairAmbigCodes cc[] = {
+ static const OnigPairAmbigCodes cc[] = {
{ 0xb6, 0xdc },
{ 0xb8, 0xdd },
{ 0xb9, 0xde },
diff --git a/ext/mbstring/oniguruma/enc/iso8859_8.c b/ext/mbstring/oniguruma/enc/iso8859_8.c
index 3c95b9b137..e76966c667 100644
--- a/ext/mbstring/oniguruma/enc/iso8859_8.c
+++ b/ext/mbstring/oniguruma/enc/iso8859_8.c
@@ -32,23 +32,23 @@
#define ENC_IS_ISO_8859_8_CTYPE(code,ctype) \
((EncISO_8859_8_CtypeTable[code] & ctype) != 0)
-static unsigned short EncISO_8859_8_CtypeTable[256] = {
+static const unsigned short EncISO_8859_8_CtypeTable[256] = {
0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
0x2008, 0x220c, 0x2209, 0x2208, 0x2208, 0x2208, 0x2008, 0x2008,
0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
- 0x2284, 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x21a0, 0x21a0, 0x21a0,
- 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
+ 0x2284, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
+ 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0,
- 0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x20a0, 0x20a0, 0x20a0, 0x21a0,
+ 0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
0x21a0, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x34a2,
0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
- 0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x31a0,
- 0x20a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2,
+ 0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x31a0,
+ 0x21a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2,
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
- 0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x20a0, 0x21a0, 0x20a0, 0x2008,
+ 0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x2008,
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
diff --git a/ext/mbstring/oniguruma/enc/iso8859_9.c b/ext/mbstring/oniguruma/enc/iso8859_9.c
index 1b061ff6ea..16a30c5f24 100644
--- a/ext/mbstring/oniguruma/enc/iso8859_9.c
+++ b/ext/mbstring/oniguruma/enc/iso8859_9.c
@@ -2,7 +2,7 @@
iso8859_9.c - Oniguruma (regular expression library)
**********************************************************************/
/*-
- * Copyright (c) 2002-2005 K.Kosako
+ * Copyright (c) 2002-2006 K.Kosako
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -33,7 +33,7 @@
#define ENC_IS_ISO_8859_9_CTYPE(code,ctype) \
((EncISO_8859_9_CtypeTable[code] & ctype) != 0)
-static UChar EncISO_8859_9_ToLowerCaseTable[256] = {
+static const UChar EncISO_8859_9_ToLowerCaseTable[256] = {
'\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
'\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017',
'\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027',
@@ -68,23 +68,23 @@ static UChar EncISO_8859_9_ToLowerCaseTable[256] = {
'\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377'
};
-static unsigned short EncISO_8859_9_CtypeTable[256] = {
+static const unsigned short EncISO_8859_9_CtypeTable[256] = {
0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
0x2008, 0x220c, 0x2209, 0x2208, 0x2208, 0x2208, 0x2008, 0x2008,
0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
- 0x2284, 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x21a0, 0x21a0, 0x21a0,
- 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
+ 0x2284, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
+ 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0,
- 0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x20a0, 0x20a0, 0x20a0, 0x21a0,
+ 0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
0x21a0, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x34a2,
0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
- 0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x31a0,
- 0x20a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2,
+ 0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x31a0,
+ 0x21a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2,
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
- 0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x20a0, 0x21a0, 0x20a0, 0x2008,
+ 0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x2008,
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
@@ -109,16 +109,6 @@ iso_8859_9_mbc_to_normalize(OnigAmbigType flag,
{
const UChar* p = *pp;
- if (end > p + 1 && (flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0) {
- if ((*p == 's' && *(p+1) == 's') ||
- ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
- (*p == 'S' && *(p+1) == 'S'))) {
- *lower = 0xdf;
- (*pp) += 2;
- return 1;
- }
- }
-
if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
ONIGENC_IS_MBC_ASCII(p)) ||
((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 &&
@@ -138,22 +128,6 @@ iso_8859_9_is_mbc_ambiguous(OnigAmbigType flag,
{
const UChar* p = *pp;
- if ((flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0) {
- if (end > p + 1) {
- if ((*p == 's' && *(p+1) == 's') ||
- ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
- (*p == 'S' && *(p+1) == 'S'))) {
- (*pp) += 2;
- return TRUE;
- }
- }
-
- if (*p == 0xdf) {
- (*pp)++;
- return TRUE;
- }
- }
-
(*pp)++;
if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
ONIGENC_IS_MBC_ASCII(p)) ||
@@ -186,9 +160,9 @@ iso_8859_9_is_code_ctype(OnigCodePoint code, unsigned int ctype)
static int
iso_8859_9_get_all_pair_ambig_codes(OnigAmbigType flag,
- OnigPairAmbigCodes** ccs)
+ const OnigPairAmbigCodes** ccs)
{
- static OnigPairAmbigCodes cc[] = {
+ static const OnigPairAmbigCodes cc[] = {
{ 0xc0, 0xe0 },
{ 0xc1, 0xe1 },
{ 0xc2, 0xe2 },
@@ -272,8 +246,7 @@ OnigEncodingType OnigEncodingISO_8859_9 = {
1, /* max enc length */
1, /* min enc length */
(ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE |
- ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE |
- ONIGENC_AMBIGUOUS_MATCH_COMPOUND),
+ ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE ),
{
(OnigCodePoint )'\\' /* esc */
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */
diff --git a/ext/mbstring/oniguruma/enc/koi8.c b/ext/mbstring/oniguruma/enc/koi8.c
index f8a5a1da61..d7277e862e 100644
--- a/ext/mbstring/oniguruma/enc/koi8.c
+++ b/ext/mbstring/oniguruma/enc/koi8.c
@@ -33,7 +33,7 @@
#define ENC_IS_KOI8_CTYPE(code,ctype) \
((EncKOI8_CtypeTable[code] & ctype) != 0)
-static UChar EncKOI8_ToLowerCaseTable[256] = {
+static const UChar EncKOI8_ToLowerCaseTable[256] = {
'\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
'\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017',
'\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027',
@@ -68,23 +68,23 @@ static UChar EncKOI8_ToLowerCaseTable[256] = {
'\330', '\331', '\332', '\333', '\334', '\335', '\336', '\337'
};
-static unsigned short EncKOI8_CtypeTable[256] = {
+static const unsigned short EncKOI8_CtypeTable[256] = {
0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
0x2008, 0x220c, 0x2209, 0x2208, 0x2208, 0x2208, 0x2008, 0x2008,
0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
- 0x2284, 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x21a0, 0x21a0, 0x21a0,
- 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
+ 0x2284, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
+ 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0,
- 0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x20a0, 0x20a0, 0x20a0, 0x21a0,
+ 0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
0x21a0, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x34a2,
0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
- 0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x31a0,
- 0x20a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2,
+ 0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x31a0,
+ 0x21a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2,
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
- 0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x20a0, 0x21a0, 0x20a0, 0x2008,
+ 0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x2008,
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008,
@@ -105,9 +105,9 @@ static unsigned short EncKOI8_CtypeTable[256] = {
static int
koi8_mbc_to_normalize(OnigAmbigType flag,
- const UChar** pp, const UChar* end, UChar* lower)
+ const OnigUChar** pp, const OnigUChar* end, OnigUChar* lower)
{
- UChar* p = (UChar *)*pp;
+ const OnigUChar* p = *pp;
if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
ONIGENC_IS_MBC_ASCII(p)) ||
@@ -123,9 +123,9 @@ koi8_mbc_to_normalize(OnigAmbigType flag,
}
static int
-koi8_is_mbc_ambiguous(OnigAmbigType flag, const UChar** pp, const UChar* end)
+koi8_is_mbc_ambiguous(OnigAmbigType flag, const OnigUChar** pp, const OnigUChar* end)
{
- UChar* p = (UChar *)*pp;
+ const OnigUChar* p = *pp;
(*pp)++;
if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
@@ -151,9 +151,9 @@ koi8_is_code_ctype(OnigCodePoint code, unsigned int ctype)
static int
koi8_get_all_pair_ambig_codes(OnigAmbigType flag,
- OnigPairAmbigCodes** ccs)
+ const OnigPairAmbigCodes** ccs)
{
- static OnigPairAmbigCodes cc[] = {
+ static const OnigPairAmbigCodes cc[] = {
{ 0xc0, 0xe0 },
{ 0xc1, 0xe1 },
{ 0xc2, 0xe2 },
diff --git a/ext/mbstring/oniguruma/enc/koi8_r.c b/ext/mbstring/oniguruma/enc/koi8_r.c
index 7c626df616..1010f5ff93 100644
--- a/ext/mbstring/oniguruma/enc/koi8_r.c
+++ b/ext/mbstring/oniguruma/enc/koi8_r.c
@@ -2,7 +2,7 @@
koi8_r.c - Oniguruma (regular expression library)
**********************************************************************/
/*-
- * Copyright (c) 2002-2005 K.Kosako
+ * Copyright (c) 2002-2006 K.Kosako
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -33,7 +33,7 @@
#define ENC_IS_KOI8_R_CTYPE(code,ctype) \
((EncKOI8_R_CtypeTable[code] & ctype) != 0)
-static UChar EncKOI8_R_ToLowerCaseTable[256] = {
+static const UChar EncKOI8_R_ToLowerCaseTable[256] = {
'\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
'\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017',
'\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027',
@@ -68,23 +68,23 @@ static UChar EncKOI8_R_ToLowerCaseTable[256] = {
'\330', '\331', '\332', '\333', '\334', '\335', '\336', '\337'
};
-static unsigned short EncKOI8_R_CtypeTable[256] = {
+static const unsigned short EncKOI8_R_CtypeTable[256] = {
0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
0x2008, 0x220c, 0x2209, 0x2208, 0x2208, 0x2208, 0x2008, 0x2008,
0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
- 0x2284, 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x21a0, 0x21a0, 0x21a0,
- 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
+ 0x2284, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
+ 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0,
- 0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x20a0, 0x20a0, 0x20a0, 0x21a0,
+ 0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0,
0x21a0, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x34a2,
0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2,
- 0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x31a0,
- 0x20a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2,
+ 0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x31a0,
+ 0x21a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2,
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2,
- 0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x20a0, 0x21a0, 0x20a0, 0x2008,
+ 0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x2008,
0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0,
0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0,
0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0,
@@ -150,9 +150,12 @@ koi8_r_is_code_ctype(OnigCodePoint code, unsigned int ctype)
static int
koi8_r_get_all_pair_ambig_codes(OnigAmbigType flag,
- OnigPairAmbigCodes** ccs)
+ const OnigPairAmbigCodes** ccs)
{
- static OnigPairAmbigCodes cc[] = {
+ static const OnigPairAmbigCodes cc[] = {
+ { 0xa3, 0xb3 },
+ { 0xb3, 0xa3 },
+
{ 0xc0, 0xe0 },
{ 0xc1, 0xe1 },
{ 0xc2, 0xe2 },
diff --git a/ext/mbstring/oniguruma/enc/mktable.c b/ext/mbstring/oniguruma/enc/mktable.c
index 6b9ef4c5b5..fcf057423c 100644
--- a/ext/mbstring/oniguruma/enc/mktable.c
+++ b/ext/mbstring/oniguruma/enc/mktable.c
@@ -2,7 +2,7 @@
mktable.c
**********************************************************************/
/*-
- * Copyright (c) 2002-2004 K.Kosako
+ * Copyright (c) 2002-2006 K.Kosako
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -27,6 +27,7 @@
* SUCH DAMAGE.
*/
+#include
#include
#define NOT_RUBY
@@ -614,15 +615,10 @@ static int IsPunct(int enc, int c)
if (c >= 0x3c && c <= 0x3e) return 1;
}
- if (c >= 0x21 && c <= 0x23) return 1;
- if (c >= 0x25 && c <= 0x2a) return 1;
- if (c >= 0x2c && c <= 0x2f) return 1;
- if (c >= 0x3a && c <= 0x3b) return 1;
- if (c >= 0x3f && c <= 0x40) return 1;
- if (c >= 0x5b && c <= 0x5d) return 1;
- if (c == 0x5f) return 1;
- if (c == 0x7b) return 1;
- if (c == 0x7d) return 1;
+ if (c >= 0x21 && c <= 0x2f) return 1;
+ if (c >= 0x3a && c <= 0x40) return 1;
+ if (c >= 0x5b && c <= 0x60) return 1;
+ if (c >= 0x7b && c <= 0x7e) return 1;
switch (enc) {
case ISO_8859_1:
diff --git a/ext/mbstring/oniguruma/enc/sjis.c b/ext/mbstring/oniguruma/enc/sjis.c
index e13407bccf..f7d7d52265 100644
--- a/ext/mbstring/oniguruma/enc/sjis.c
+++ b/ext/mbstring/oniguruma/enc/sjis.c
@@ -29,7 +29,7 @@
#include "regenc.h"
-static int EncLen_SJIS[] = {
+static const int EncLen_SJIS[] = {
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
@@ -76,7 +76,7 @@ sjis_mbc_enc_len(const UChar* p)
return EncLen_SJIS[*p];
}
-extern int
+static int
sjis_code_to_mbclen(OnigCodePoint code)
{
if (code < 256) {
@@ -167,21 +167,16 @@ sjis_is_mbc_ambiguous(OnigAmbigType flag, const UChar** pp, const UChar* end)
static int
sjis_is_code_ctype(OnigCodePoint code, unsigned int ctype)
{
- if ((ctype & ONIGENC_CTYPE_WORD) != 0) {
- if (code < 128)
- return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype);
- else {
+ if (code < 128)
+ return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype);
+ else {
+ if ((ctype & (ONIGENC_CTYPE_WORD |
+ ONIGENC_CTYPE_GRAPH | ONIGENC_CTYPE_PRINT)) != 0) {
return (sjis_code_to_mbclen(code) > 1 ? TRUE : FALSE);
}
-
- ctype &= ~ONIGENC_CTYPE_WORD;
- if (ctype == 0) return FALSE;
}
- if (code < 128)
- return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype);
- else
- return FALSE;
+ return FALSE;
}
static UChar*
diff --git a/ext/mbstring/oniguruma/enc/unicode.c b/ext/mbstring/oniguruma/enc/unicode.c
index e3be9450a5..a8cf539014 100644
--- a/ext/mbstring/oniguruma/enc/unicode.c
+++ b/ext/mbstring/oniguruma/enc/unicode.c
@@ -30,7 +30,7 @@
#include "regenc.h"
-unsigned short OnigEnc_Unicode_ISO_8859_1_CtypeTable[256] = {
+const unsigned short OnigEnc_Unicode_ISO_8859_1_CtypeTable[256] = {
0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
0x2008, 0x228c, 0x2289, 0x2288, 0x2288, 0x2288, 0x2008, 0x2008,
0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008,
@@ -65,7 +65,7 @@ unsigned short OnigEnc_Unicode_ISO_8859_1_CtypeTable[256] = {
0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2
};
-static OnigCodePoint CRAlnum[] = {
+static const OnigCodePoint CRAlnum[] = {
#ifdef USE_UNICODE_FULL_RANGE_CTYPE
414,
#else
@@ -490,7 +490,7 @@ static OnigCodePoint CRAlnum[] = {
#endif /* USE_UNICODE_FULL_RANGE_CTYPE */
}; /* end of CRAlnum */
-static OnigCodePoint CRAlpha[] = {
+static const OnigCodePoint CRAlpha[] = {
#ifdef USE_UNICODE_FULL_RANGE_CTYPE
396,
#else
@@ -897,7 +897,7 @@ static OnigCodePoint CRAlpha[] = {
#endif /* USE_UNICODE_FULL_RANGE_CTYPE */
}; /* end of CRAlpha */
-static OnigCodePoint CRBlank[] = {
+static const OnigCodePoint CRBlank[] = {
#ifdef USE_UNICODE_FULL_RANGE_CTYPE
9,
#else
@@ -917,7 +917,7 @@ static OnigCodePoint CRBlank[] = {
#endif /* USE_UNICODE_FULL_RANGE_CTYPE */
}; /* end of CRBlank */
-static OnigCodePoint CRCntrl[] = {
+static const OnigCodePoint CRCntrl[] = {
#ifdef USE_UNICODE_FULL_RANGE_CTYPE
19,
#else
@@ -947,7 +947,7 @@ static OnigCodePoint CRCntrl[] = {
#endif /* USE_UNICODE_FULL_RANGE_CTYPE */
}; /* end of CRCntrl */
-static OnigCodePoint CRDigit[] = {
+static const OnigCodePoint CRDigit[] = {
#ifdef USE_UNICODE_FULL_RANGE_CTYPE
23,
#else
@@ -981,7 +981,7 @@ static OnigCodePoint CRDigit[] = {
#endif /* USE_UNICODE_FULL_RANGE_CTYPE */
}; /* end of CRDigit */
-static OnigCodePoint CRGraph[] = {
+static const OnigCodePoint CRGraph[] = {
#ifdef USE_UNICODE_FULL_RANGE_CTYPE
405,
#else
@@ -1397,7 +1397,7 @@ static OnigCodePoint CRGraph[] = {
#endif /* USE_UNICODE_FULL_RANGE_CTYPE */
}; /* end of CRGraph */
-static OnigCodePoint CRLower[] = {
+static const OnigCodePoint CRLower[] = {
#ifdef USE_UNICODE_FULL_RANGE_CTYPE
424,
#else
@@ -1832,7 +1832,7 @@ static OnigCodePoint CRLower[] = {
#endif /* USE_UNICODE_FULL_RANGE_CTYPE */
}; /* end of CRLower */
-static OnigCodePoint CRPrint[] = {
+static const OnigCodePoint CRPrint[] = {
#ifdef USE_UNICODE_FULL_RANGE_CTYPE
405,
#else
@@ -2248,7 +2248,7 @@ static OnigCodePoint CRPrint[] = {
#endif /* USE_UNICODE_FULL_RANGE_CTYPE */
}; /* end of CRPrint */
-static OnigCodePoint CRPunct[] = {
+static const OnigCodePoint CRPunct[] = {
#ifdef USE_UNICODE_FULL_RANGE_CTYPE
86,
#else
@@ -2345,7 +2345,7 @@ static OnigCodePoint CRPunct[] = {
#endif /* USE_UNICODE_FULL_RANGE_CTYPE */
}; /* end of CRPunct */
-static OnigCodePoint CRSpace[] = {
+static const OnigCodePoint CRSpace[] = {
#ifdef USE_UNICODE_FULL_RANGE_CTYPE
11,
#else
@@ -2367,7 +2367,7 @@ static OnigCodePoint CRSpace[] = {
#endif /* USE_UNICODE_FULL_RANGE_CTYPE */
}; /* end of CRSpace */
-static OnigCodePoint CRUpper[] = {
+static const OnigCodePoint CRUpper[] = {
#ifdef USE_UNICODE_FULL_RANGE_CTYPE
421,
#else
@@ -2799,7 +2799,7 @@ static OnigCodePoint CRUpper[] = {
#endif /* USE_UNICODE_FULL_RANGE_CTYPE */
}; /* end of CRUpper */
-static OnigCodePoint CRXDigit[] = {
+static const OnigCodePoint CRXDigit[] = {
#ifdef USE_UNICODE_FULL_RANGE_CTYPE
3,
#else
@@ -2810,7 +2810,7 @@ static OnigCodePoint CRXDigit[] = {
0x0061, 0x0066
};
-static OnigCodePoint CRASCII[] = {
+static const OnigCodePoint CRASCII[] = {
#ifdef USE_UNICODE_FULL_RANGE_CTYPE
1,
#else
@@ -2819,7 +2819,7 @@ static OnigCodePoint CRASCII[] = {
0x0000, 0x007f
};
-static OnigCodePoint CRWord[] = {
+static const OnigCodePoint CRWord[] = {
#ifdef USE_UNICODE_FULL_RANGE_CTYPE
436,
#else
@@ -3320,6 +3320,9 @@ onigenc_unicode_is_code_ctype(OnigCodePoint code, unsigned int ctype)
case ONIGENC_CTYPE_ALNUM:
return onig_is_in_code_range((UChar* )CRAlnum, code);
break;
+ case ONIGENC_CTYPE_NEWLINE:
+ return FALSE;
+ break;
default:
return ONIGENCERR_TYPE_BUG;
@@ -3337,9 +3340,9 @@ onigenc_unicode_is_code_ctype(OnigCodePoint code, unsigned int ctype)
extern int
onigenc_unicode_get_ctype_code_range(int ctype,
- OnigCodePoint* sbr[], OnigCodePoint* mbr[])
+ const OnigCodePoint* sbr[], const OnigCodePoint* mbr[])
{
- static OnigCodePoint EmptyRange[] = { 0 };
+ static const OnigCodePoint EmptyRange[] = { 0 };
#define CR_SET(list) do { \
*mbr = list; \
diff --git a/ext/mbstring/oniguruma/enc/utf16_be.c b/ext/mbstring/oniguruma/enc/utf16_be.c
index ad33ddbeeb..6ab80a6c1c 100755
--- a/ext/mbstring/oniguruma/enc/utf16_be.c
+++ b/ext/mbstring/oniguruma/enc/utf16_be.c
@@ -2,7 +2,7 @@
utf16_be.c - Oniguruma (regular expression library)
**********************************************************************/
/*-
- * Copyright (c) 2002-2005 K.Kosako
+ * Copyright (c) 2002-2006 K.Kosako
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -32,7 +32,7 @@
#define UTF16_IS_SURROGATE_FIRST(c) (c >= 0xd8 && c <= 0xdb)
#define UTF16_IS_SURROGATE_SECOND(c) (c >= 0xdc && c <= 0xdf)
-static int EncLen_UTF16[] = {
+static const int EncLen_UTF16[] = {
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
@@ -63,6 +63,12 @@ utf16be_is_mbc_newline(const UChar* p, const UChar* end)
if (p + 1 < end) {
if (*(p+1) == 0x0a && *p == 0x00)
return 1;
+#ifdef USE_UNICODE_ALL_LINE_TERMINATORS
+ if ((*(p+1) == 0x0d || *(p+1) == 0x85) && *p == 0x00)
+ return 1;
+ if (*p == 0x20 && (*(p+1) == 0x29 || *(p+1) == 0x28))
+ return 1;
+#endif
}
return 0;
}
@@ -120,18 +126,6 @@ utf16be_mbc_to_normalize(OnigAmbigType flag, const UChar** pp, const UChar* end,
if (*p == 0) {
p++;
- if (end > p + 2 &&
- (flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0 &&
- ((*p == 's' && *(p+2) == 's') ||
- ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
- (*p == 'S' && *(p+2) == 'S'))) &&
- *(p+1) == 0) {
- *lower++ = '\0';
- *lower = 0xdf;
- (*pp) += 4;
- return 2;
- }
-
*lower++ = '\0';
if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
ONIGENC_IS_MBC_ASCII(p)) ||
@@ -171,20 +165,6 @@ utf16be_is_mbc_ambiguous(OnigAmbigType flag, const UChar** pp, const UChar* end)
int c, v;
p++;
- if ((flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0) {
- if (end > p + 2 &&
- ((*p == 's' && *(p+2) == 's') ||
- ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
- (*p == 'S' && *(p+2) == 'S'))) &&
- *(p+1) == 0) {
- (*pp) += 2;
- return TRUE;
- }
- else if (*p == 0xdf) {
- return TRUE;
- }
- }
-
if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
ONIGENC_IS_MBC_ASCII(p)) ||
((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 &&
@@ -228,8 +208,7 @@ OnigEncodingType OnigEncodingUTF16_BE = {
4, /* max byte length */
2, /* min byte length */
(ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE |
- ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE |
- ONIGENC_AMBIGUOUS_MATCH_COMPOUND),
+ ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE ),
{
(OnigCodePoint )'\\' /* esc */
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */
diff --git a/ext/mbstring/oniguruma/enc/utf16_le.c b/ext/mbstring/oniguruma/enc/utf16_le.c
index db892dcd14..2248e4910f 100755
--- a/ext/mbstring/oniguruma/enc/utf16_le.c
+++ b/ext/mbstring/oniguruma/enc/utf16_le.c
@@ -2,7 +2,7 @@
utf16_le.c - Oniguruma (regular expression library)
**********************************************************************/
/*-
- * Copyright (c) 2002-2005 K.Kosako
+ * Copyright (c) 2002-2006 K.Kosako
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -32,7 +32,7 @@
#define UTF16_IS_SURROGATE_FIRST(c) (c >= 0xd8 && c <= 0xdb)
#define UTF16_IS_SURROGATE_SECOND(c) (c >= 0xdc && c <= 0xdf)
-static int EncLen_UTF16[] = {
+static const int EncLen_UTF16[] = {
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
@@ -69,6 +69,12 @@ utf16le_is_mbc_newline(const UChar* p, const UChar* end)
if (p + 1 < end) {
if (*p == 0x0a && *(p+1) == 0x00)
return 1;
+#ifdef USE_UNICODE_ALL_LINE_TERMINATORS
+ if ((*p == 0x0d || *p == 0x85) && *(p+1) == 0x00)
+ return 1;
+ if (*(p+1) == 0x20 && (*p == 0x29 || *p == 0x28))
+ return 1;
+#endif
}
return 0;
}
@@ -122,18 +128,6 @@ utf16le_mbc_to_normalize(OnigAmbigType flag, const UChar** pp, const UChar* end,
const UChar* p = *pp;
if (*(p+1) == 0) {
- if (end > p + 3 &&
- (flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0 &&
- ((*p == 's' && *(p+2) == 's') ||
- ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
- (*p == 'S' && *(p+2) == 'S'))) &&
- *(p+3) == 0) {
- *lower++ = 0xdf;
- *lower = '\0';
- (*pp) += 4;
- return 2;
- }
-
*(lower+1) = '\0';
if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
ONIGENC_IS_MBC_ASCII(p)) ||
@@ -170,17 +164,6 @@ utf16le_is_mbc_ambiguous(OnigAmbigType flag, const UChar** pp, const UChar* end)
if (*(p+1) == 0) {
int c, v;
- if ((flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0) {
- if (end > p + 3 &&
- ((*p == 's' && *(p+2) == 's') ||
- ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
- (*p == 'S' && *(p+2) == 'S'))) &&
- *(p+3) == 0) {
- (*pp) += 2;
- return TRUE;
- }
- }
-
if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
ONIGENC_IS_MBC_ASCII(p)) ||
((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 &&
@@ -223,8 +206,7 @@ OnigEncodingType OnigEncodingUTF16_LE = {
4, /* max byte length */
2, /* min byte length */
(ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE |
- ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE |
- ONIGENC_AMBIGUOUS_MATCH_COMPOUND),
+ ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE ),
{
(OnigCodePoint )'\\' /* esc */
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */
diff --git a/ext/mbstring/oniguruma/enc/utf32_be.c b/ext/mbstring/oniguruma/enc/utf32_be.c
index 60feb040b8..75133ca262 100755
--- a/ext/mbstring/oniguruma/enc/utf32_be.c
+++ b/ext/mbstring/oniguruma/enc/utf32_be.c
@@ -2,7 +2,7 @@
utf32_be.c - Oniguruma (regular expression library)
**********************************************************************/
/*-
- * Copyright (c) 2002-2005 K.Kosako
+ * Copyright (c) 2002-2006 K.Kosako
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -41,6 +41,14 @@ utf32be_is_mbc_newline(const UChar* p, const UChar* end)
if (p + 3 < end) {
if (*(p+3) == 0x0a && *(p+2) == 0 && *(p+1) == 0 && *p == 0)
return 1;
+#ifdef USE_UNICODE_ALL_LINE_TERMINATORS
+ if ((*(p+3) == 0x0d || *(p+3) == 0x85)
+ && *(p+2) == 0 && *(p+1) == 0 && *p == 0x00)
+ return 1;
+ if (*(p+2) == 0x20 && (*(p+3) == 0x29 || *(p+3) == 0x28)
+ && *(p+1) == 0 && *p == 0)
+ return 1;
+#endif
}
return 0;
}
@@ -77,20 +85,6 @@ utf32be_mbc_to_normalize(OnigAmbigType flag, const UChar** pp, const UChar* end,
if (*(p+2) == 0 && *(p+1) == 0 && *p == 0) {
p += 3;
- if (end > p + 4 &&
- (flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0 &&
- ((*p == 's' && *(p+4) == 's') ||
- ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
- (*p == 'S' && *(p+4) == 'S'))) &&
- *(p+3) == 0 && *(p+2) == 0 && *(p+1) == 0) {
- *lower++ = '\0';
- *lower++ = '\0';
- *lower++ = '\0';
- *lower = 0xdf;
- (*pp) += 8;
- return 4;
- }
-
*lower++ = '\0';
*lower++ = '\0';
*lower++ = '\0';
@@ -131,20 +125,6 @@ utf32be_is_mbc_ambiguous(OnigAmbigType flag, const UChar** pp, const UChar* end)
int c, v;
p += 3;
- if ((flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0) {
- if (end > p + 4 &&
- ((*p == 's' && *(p+4) == 's') ||
- ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
- (*p == 'S' && *(p+4) == 'S'))) &&
- *(p+3) == 0 && *(p+2) == 0 && *(p+1) == 0) {
- (*pp) += 4;
- return TRUE;
- }
- else if (*p == 0xdf) {
- return TRUE;
- }
- }
-
if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
ONIGENC_IS_MBC_ASCII(p)) ||
((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 &&
@@ -183,8 +163,7 @@ OnigEncodingType OnigEncodingUTF32_BE = {
4, /* max byte length */
4, /* min byte length */
(ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE |
- ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE |
- ONIGENC_AMBIGUOUS_MATCH_COMPOUND),
+ ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE ),
{
(OnigCodePoint )'\\' /* esc */
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */
diff --git a/ext/mbstring/oniguruma/enc/utf32_le.c b/ext/mbstring/oniguruma/enc/utf32_le.c
index bba9689f76..21dca10c11 100755
--- a/ext/mbstring/oniguruma/enc/utf32_le.c
+++ b/ext/mbstring/oniguruma/enc/utf32_le.c
@@ -2,7 +2,7 @@
utf32_le.c - Oniguruma (regular expression library)
**********************************************************************/
/*-
- * Copyright (c) 2002-2005 K.Kosako
+ * Copyright (c) 2002-2006 K.Kosako
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -41,6 +41,14 @@ utf32le_is_mbc_newline(const UChar* p, const UChar* end)
if (p + 3 < end) {
if (*p == 0x0a && *(p+1) == 0 && *(p+2) == 0 && *(p+3) == 0)
return 1;
+#ifdef USE_UNICODE_ALL_LINE_TERMINATORS
+ if ((*p == 0x0d || *p == 0x85) && *(p+1) == 0x00
+ && (p+2) == 0x00 && *(p+3) == 0x00)
+ return 1;
+ if (*(p+1) == 0x20 && (*p == 0x29 || *p == 0x28)
+ && *(p+2) == 0x00 && *(p+3) == 0x00)
+ return 1;
+#endif
}
return 0;
}
@@ -76,20 +84,6 @@ utf32le_mbc_to_normalize(OnigAmbigType flag, const UChar** pp, const UChar* end,
const UChar* p = *pp;
if (*(p+1) == 0 && *(p+2) == 0 && *(p+3) == 0) {
- if (end > p + 7 &&
- (flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0 &&
- ((*p == 's' && *(p+4) == 's') ||
- ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
- (*p == 'S' && *(p+4) == 'S'))) &&
- *(p+5) == 0 && *(p+6) == 0 && *(p+7) == 0) {
- *lower++ = 0xdf;
- *lower++ = '\0';
- *lower++ = '\0';
- *lower = '\0';
- (*pp) += 8;
- return 4;
- }
-
if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
ONIGENC_IS_MBC_ASCII(p)) ||
((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 &&
@@ -129,20 +123,6 @@ utf32le_is_mbc_ambiguous(OnigAmbigType flag, const UChar** pp, const UChar* end)
if (*(p+1) == 0 && *(p+2) == 0 && *(p+3) == 0) {
int c, v;
- if ((flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0) {
- if (end > p + 7 &&
- ((*p == 's' && *(p+4) == 's') ||
- ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
- (*p == 'S' && *(p+4) == 'S'))) &&
- *(p+5) == 0 && *(p+6) == 0 && *(p+7) == 0) {
- (*pp) += 4;
- return TRUE;
- }
- else if (*p == 0xdf) {
- return TRUE;
- }
- }
-
if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
ONIGENC_IS_MBC_ASCII(p)) ||
((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 &&
@@ -181,8 +161,7 @@ OnigEncodingType OnigEncodingUTF32_LE = {
4, /* max byte length */
4, /* min byte length */
(ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE |
- ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE |
- ONIGENC_AMBIGUOUS_MATCH_COMPOUND),
+ ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE ),
{
(OnigCodePoint )'\\' /* esc */
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */
diff --git a/ext/mbstring/oniguruma/enc/utf8.c b/ext/mbstring/oniguruma/enc/utf8.c
index 592bebfe8f..c7481d7050 100644
--- a/ext/mbstring/oniguruma/enc/utf8.c
+++ b/ext/mbstring/oniguruma/enc/utf8.c
@@ -2,7 +2,7 @@
utf8.c - Oniguruma (regular expression library)
**********************************************************************/
/*-
- * Copyright (c) 2002-2005 K.Kosako
+ * Copyright (c) 2002-2006 K.Kosako
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -40,7 +40,7 @@
#define utf8_islead(c) ((UChar )((c) & 0xc0) != 0x80)
-static int EncLen_UTF8[] = {
+static const int EncLen_UTF8[] = {
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
@@ -65,6 +65,29 @@ utf8_mbc_enc_len(const UChar* p)
return EncLen_UTF8[*p];
}
+static int
+utf8_is_mbc_newline(const UChar* p, const UChar* end)
+{
+ if (p < end) {
+ if (*p == 0x0a) return 1;
+
+#ifdef USE_UNICODE_ALL_LINE_TERMINATORS
+ if (*p == 0x0d) return 1;
+ if (p + 1 < end) {
+ if (*(p+1) == 0x85 && *p == 0xc2) /* U+0085 */
+ return 1;
+ if (p + 2 < end) {
+ if ((*(p+2) == 0xa8 || *(p+2) == 0xa9)
+ && *(p+1) == 0x80 && *p == 0xe2) /* U+2028, U+2029 */
+ return 1;
+ }
+ }
+#endif
+ }
+
+ return 0;
+}
+
static OnigCodePoint
utf8_mbc_to_code(const UChar* p, const UChar* end)
{
@@ -200,17 +223,6 @@ utf8_mbc_to_normalize(OnigAmbigType flag, const UChar** pp, const UChar* end, UC
const UChar* p = *pp;
if (ONIGENC_IS_MBC_ASCII(p)) {
- if (end > p + 1 &&
- (flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0 &&
- ((*p == 's' && *(p+1) == 's') ||
- ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
- (*p == 'S' && *(p+1) == 'S')))) {
- *lower++ = '\303';
- *lower = '\237';
- (*pp) += 2;
- return 2;
- }
-
if ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0) {
*lower = ONIGENC_ASCII_CODE_TO_LOWER_CASE(*p);
}
@@ -235,15 +247,6 @@ utf8_mbc_to_normalize(OnigAmbigType flag, const UChar** pp, const UChar* end, UC
return 2;
}
}
-#if 0
- else if (c == (UChar )'\237' &&
- (flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0) {
- *lower++ = '\303';
- *lower = '\237';
- (*pp) += 2;
- return 2;
- }
-#endif
}
}
@@ -265,15 +268,6 @@ utf8_is_mbc_ambiguous(OnigAmbigType flag, const UChar** pp, const UChar* end)
const UChar* p = *pp;
if (ONIGENC_IS_MBC_ASCII(p)) {
- if (end > p + 1 &&
- (flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0 &&
- ((*p == 's' && *(p+1) == 's') ||
- ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 &&
- (*p == 'S' && *(p+1) == 'S')))) {
- (*pp) += 2;
- return TRUE;
- }
-
(*pp)++;
if ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0) {
return ONIGENC_IS_ASCII_CODE_CASE_AMBIG(*p);
@@ -295,10 +289,6 @@ utf8_is_mbc_ambiguous(OnigAmbigType flag, const UChar** pp, const UChar* end)
return TRUE;
}
}
- else if (c == (UChar )'\237' &&
- (flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0) {
- return TRUE;
- }
}
}
}
@@ -307,16 +297,16 @@ utf8_is_mbc_ambiguous(OnigAmbigType flag, const UChar** pp, const UChar* end)
}
-static OnigCodePoint EmptyRange[] = { 0 };
+static const OnigCodePoint EmptyRange[] = { 0 };
-static OnigCodePoint SBAlnum[] = {
+static const OnigCodePoint SBAlnum[] = {
3,
0x0030, 0x0039,
0x0041, 0x005a,
0x0061, 0x007a
};
-static OnigCodePoint MBAlnum[] = {
+static const OnigCodePoint MBAlnum[] = {
#ifdef USE_UNICODE_FULL_RANGE_CTYPE
411,
#else
@@ -738,13 +728,13 @@ static OnigCodePoint MBAlnum[] = {
#endif /* USE_UNICODE_FULL_RANGE_CTYPE */
}; /* end of MBAlnum */
-static OnigCodePoint SBAlpha[] = {
+static const OnigCodePoint SBAlpha[] = {
2,
0x0041, 0x005a,
0x0061, 0x007a
};
-static OnigCodePoint MBAlpha[] = {
+static const OnigCodePoint MBAlpha[] = {
#ifdef USE_UNICODE_FULL_RANGE_CTYPE
394,
#else
@@ -1149,13 +1139,13 @@ static OnigCodePoint MBAlpha[] = {
#endif /* USE_UNICODE_FULL_RANGE_CTYPE */
}; /* end of MBAlpha */
-static OnigCodePoint SBBlank[] = {
+static const OnigCodePoint SBBlank[] = {
2,
0x0009, 0x0009,
0x0020, 0x0020
};
-static OnigCodePoint MBBlank[] = {
+static const OnigCodePoint MBBlank[] = {
#ifdef USE_UNICODE_FULL_RANGE_CTYPE
7,
#else
@@ -1173,13 +1163,13 @@ static OnigCodePoint MBBlank[] = {
#endif /* USE_UNICODE_FULL_RANGE_CTYPE */
}; /* end of MBBlank */
-static OnigCodePoint SBCntrl[] = {
+static const OnigCodePoint SBCntrl[] = {
2,
0x0000, 0x001f,
0x007f, 0x007f
};
-static OnigCodePoint MBCntrl[] = {
+static const OnigCodePoint MBCntrl[] = {
#ifdef USE_UNICODE_FULL_RANGE_CTYPE
18,
#else
@@ -1208,12 +1198,12 @@ static OnigCodePoint MBCntrl[] = {
#endif /* USE_UNICODE_FULL_RANGE_CTYPE */
}; /* end of MBCntrl */
-static OnigCodePoint SBDigit[] = {
+static const OnigCodePoint SBDigit[] = {
1,
0x0030, 0x0039
};
-static OnigCodePoint MBDigit[] = {
+static const OnigCodePoint MBDigit[] = {
#ifdef USE_UNICODE_FULL_RANGE_CTYPE
22,
#else
@@ -1245,12 +1235,12 @@ static OnigCodePoint MBDigit[] = {
#endif /* USE_UNICODE_FULL_RANGE_CTYPE */
}; /* end of MBDigit */
-static OnigCodePoint SBGraph[] = {
+static const OnigCodePoint SBGraph[] = {
1,
0x0021, 0x007e
};
-static OnigCodePoint MBGraph[] = {
+static const OnigCodePoint MBGraph[] = {
#ifdef USE_UNICODE_FULL_RANGE_CTYPE
404,
#else
@@ -1665,12 +1655,12 @@ static OnigCodePoint MBGraph[] = {
#endif /* USE_UNICODE_FULL_RANGE_CTYPE */
}; /* end of MBGraph */
-static OnigCodePoint SBLower[] = {
+static const OnigCodePoint SBLower[] = {
1,
0x0061, 0x007a
};
-static OnigCodePoint MBLower[] = {
+static const OnigCodePoint MBLower[] = {
#ifdef USE_UNICODE_FULL_RANGE_CTYPE
423,
#else
@@ -2104,13 +2094,13 @@ static OnigCodePoint MBLower[] = {
#endif /* USE_UNICODE_FULL_RANGE_CTYPE */
}; /* end of MBLower */
-static OnigCodePoint SBPrint[] = {
+static const OnigCodePoint SBPrint[] = {
2,
0x0009, 0x000d,
0x0020, 0x007e
};
-static OnigCodePoint MBPrint[] = {
+static const OnigCodePoint MBPrint[] = {
#ifdef USE_UNICODE_FULL_RANGE_CTYPE
403,
#else
@@ -2524,7 +2514,7 @@ static OnigCodePoint MBPrint[] = {
#endif /* USE_UNICODE_FULL_RANGE_CTYPE */
}; /* end of MBPrint */
-static OnigCodePoint SBPunct[] = {
+static const OnigCodePoint SBPunct[] = {
9,
0x0021, 0x0023,
0x0025, 0x002a,
@@ -2537,7 +2527,7 @@ static OnigCodePoint SBPunct[] = {
0x007d, 0x007d
}; /* end of SBPunct */
-static OnigCodePoint MBPunct[] = {
+static const OnigCodePoint MBPunct[] = {
#ifdef USE_UNICODE_FULL_RANGE_CTYPE
77,
#else
@@ -2625,13 +2615,13 @@ static OnigCodePoint MBPunct[] = {
#endif /* USE_UNICODE_FULL_RANGE_CTYPE */
}; /* end of MBPunct */
-static OnigCodePoint SBSpace[] = {
+static const OnigCodePoint SBSpace[] = {
2,
0x0009, 0x000d,
0x0020, 0x0020
};
-static OnigCodePoint MBSpace[] = {
+static const OnigCodePoint MBSpace[] = {
#ifdef USE_UNICODE_FULL_RANGE_CTYPE
9,
#else
@@ -2651,12 +2641,12 @@ static OnigCodePoint MBSpace[] = {
#endif /* USE_UNICODE_FULL_RANGE_CTYPE */
}; /* end of MBSpace */
-static OnigCodePoint SBUpper[] = {
+static const OnigCodePoint SBUpper[] = {
1,
0x0041, 0x005a
};
-static OnigCodePoint MBUpper[] = {
+static const OnigCodePoint MBUpper[] = {
#ifdef USE_UNICODE_FULL_RANGE_CTYPE
420,
#else
@@ -3087,19 +3077,19 @@ static OnigCodePoint MBUpper[] = {
#endif /* USE_UNICODE_FULL_RANGE_CTYPE */
}; /* end of MBUpper */
-static OnigCodePoint SBXDigit[] = {
+static const OnigCodePoint SBXDigit[] = {
3,
0x0030, 0x0039,
0x0041, 0x0046,
0x0061, 0x0066
};
-static OnigCodePoint SBASCII[] = {
+static const OnigCodePoint SBASCII[] = {
1,
0x0000, 0x007f
};
-static OnigCodePoint SBWord[] = {
+static const OnigCodePoint SBWord[] = {
4,
0x0030, 0x0039,
0x0041, 0x005a,
@@ -3107,7 +3097,7 @@ static OnigCodePoint SBWord[] = {
0x0061, 0x007a
};
-static OnigCodePoint MBWord[] = {
+static const OnigCodePoint MBWord[] = {
#ifdef USE_UNICODE_FULL_RANGE_CTYPE
432,
#else
@@ -3554,7 +3544,7 @@ static OnigCodePoint MBWord[] = {
static int
utf8_get_ctype_code_range(int ctype,
- OnigCodePoint* sbr[], OnigCodePoint* mbr[])
+ const OnigCodePoint* sbr[], const OnigCodePoint* mbr[])
{
#define CR_SET(sbl,mbl) do { \
*sbr = sbl; \
@@ -3622,7 +3612,7 @@ static int
utf8_is_code_ctype(OnigCodePoint code, unsigned int ctype)
{
#ifdef USE_UNICODE_FULL_RANGE_CTYPE
- OnigCodePoint *range;
+ const OnigCodePoint *range;
#endif
if (code < 256) {
@@ -3674,6 +3664,9 @@ utf8_is_code_ctype(OnigCodePoint code, unsigned int ctype)
case ONIGENC_CTYPE_ALNUM:
range = MBAlnum;
break;
+ case ONIGENC_CTYPE_NEWLINE:
+ return FALSE;
+ break;
default:
return ONIGENCERR_TYPE_BUG;
@@ -3713,8 +3706,7 @@ OnigEncodingType OnigEncodingUTF8 = {
6, /* max byte length */
1, /* min byte length */
(ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE |
- ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE |
- ONIGENC_AMBIGUOUS_MATCH_COMPOUND),
+ ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE ),
{
(OnigCodePoint )'\\' /* esc */
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */
@@ -3723,7 +3715,7 @@ OnigEncodingType OnigEncodingUTF8 = {
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
, (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
},
- onigenc_is_mbc_newline_0x0a,
+ utf8_is_mbc_newline,
utf8_mbc_to_code,
utf8_code_to_mbclen,
utf8_code_to_mbc,
diff --git a/ext/mbstring/oniguruma/index.html b/ext/mbstring/oniguruma/index.html
index 02e844c363..d55f1cc94f 100755
--- a/ext/mbstring/oniguruma/index.html
+++ b/ext/mbstring/oniguruma/index.html
@@ -5,58 +5,50 @@
-
-M
-N
-
-Oniguruma
+Oniguruma
(Japanese)
-2005/03/07 (C) K.Kosako
+(c) K.Kosako, updated at: 2007/08/16
+
+
+- What's new
+
+
+- 2007/08/16: Version 4.7.1 released.
+- 2007/07/14: Version 5.9.0 released.
+- 2007/06/20: Version 2.5.9 released.
+- 2007/06/20: Maintainer of 2.x was changed.
+
+
+
+
Oniguruma is a regular expressions library.
The characteristics of this library is that different character encoding
for every regular expression object can be specified.
+
(supported APIs: GNU regex, POSIX and Oniguruma native)
- Supported character encodings:
ASCII, UTF-8, UTF-16BE, UTF-16LE, UTF-32BE, UTF-32LE,
EUC-JP, EUC-TW, EUC-KR, EUC-CN,
-Shift_JIS, Big5, KOI8-R, KOI8,
+Shift_JIS, Big5, GB18030, KOI8-R, CP1251,
ISO-8859-1, ISO-8859-2, ISO-8859-3, ISO-8859-4, ISO-8859-5,
ISO-8859-6, ISO-8859-7, ISO-8859-8, ISO-8859-9, ISO-8859-10,
-ISO-8859-11, ISO-8859-13, ISO-8859-14, ISO-8859-15, ISO-8859-16
-
-
-
-
-
+ISO-8859-11, ISO-8859-13, ISO-8859-14, ISO-8859-15, ISO-8859-16
-- What's new
+(GB18030 encoding was contributed by KUBO Takehiro)
+(CP1251 encoding was contributed by Byte)
-
-- released Version 3.7.1 (2005/03/07)
-
- released Version 2.4.2 (2005/03/05)
-
+
-
-- There are two ways of using of it in this program.
-
-- (1) C library (supported APIs: GNU regex, POSIX and Oniguruma native)
-
- (2) Built-in regular expressions engine of Ruby 1.6/1.8/1.9
- In Ruby 1.9, Oniguruma is already integrated by Kazuo Saito.
-
-
+License: BSD license.
- Platform:
@@ -67,31 +59,27 @@ ISO-8859-11, ISO-8859-13, ISO-8859-14, ISO-8859-15, ISO-8859-16
-- License:
-When this software is partly used or it is distributed with Ruby,
-this of Ruby follows the license of Ruby.
-It follows the BSD license in the case of the one except for it.
-
- Download:
-* 3.X.X supports UTF-16/UTF-32, Ruby 1.9.X.
-* 2.X.X does not support UTF-16/UTF-32, supports Ruby 1.6/1.8.
+Maintainer of 2.x was changed to Hannes Wyss <hwyss AT ywesee.com>.
+About 2.x, please contact him.
+* 5.x supports Unicode Property/Script.
+* 2.x supports Ruby1.6/1.8.
-
-- Documents: (version 3.7.1)
+
- Documents: (version 5.9.0)
-- Links:
+
- Site Links:
+
+
+- Links:
+
- References:
-
and I'm thankful to Akinori MUSHA.
-
+
+- Other Libraries:
+
+
+
+Back to Home