From a0f3cf5cc4a42850b940da33a0b912268286bfd3 Mon Sep 17 00:00:00 2001 From: Jani Taskinen Date: Mon, 20 Apr 2009 17:06:03 +0000 Subject: [PATCH] MFB: Thanks to the "maintainers" who are too lazy to commit FIRST to HEAD! --- ext/mbstring/config.m4 | 13 +- ext/mbstring/config.w32 | 14 +- ext/mbstring/libmbfl/AUTHORS | 15 +- ext/mbstring/libmbfl/Makefile.am | 6 +- ext/mbstring/libmbfl/configure.in | 15 +- ext/mbstring/libmbfl/filters/Makefile.am | 134 +- ext/mbstring/libmbfl/filters/Makefile.bcc32 | 52 +- .../libmbfl/filters/mbfilter_cp1254.c | 157 +++ .../libmbfl/filters/mbfilter_cp1254.h | 43 + ext/mbstring/libmbfl/filters/mbfilter_cp850.c | 147 +++ ext/mbstring/libmbfl/filters/mbfilter_cp850.h | 37 + .../libmbfl/filters/mbfilter_euc_jp_win.c | 11 + .../libmbfl/filters/mbfilter_iso2022_jp_ms.c | 522 ++++++++ .../libmbfl/filters/mbfilter_iso2022_jp_ms.h | 44 + ext/mbstring/libmbfl/filters/mbfilter_jis.c | 154 +-- ext/mbstring/libmbfl/filters/mbfilter_jis.h | 4 - ext/mbstring/libmbfl/filters/mbfilter_koi8u.c | 146 +++ ext/mbstring/libmbfl/filters/mbfilter_koi8u.h | 47 + .../libmbfl/filters/unicode_table_cp1254.h | 51 + .../libmbfl/filters/unicode_table_cp850.h | 52 + .../libmbfl/filters/unicode_table_jis.h | 14 +- .../libmbfl/filters/unicode_table_koi8u.h | 166 +++ ext/mbstring/libmbfl/libmbfl.dsp | 12 + ext/mbstring/libmbfl/libmbfl.sln | 26 +- ext/mbstring/libmbfl/libmbfl.vcproj | 777 ++++++++---- ext/mbstring/libmbfl/mbfl.rc | 8 +- ext/mbstring/libmbfl/mbfl/Makefile.am | 29 +- ext/mbstring/libmbfl/mbfl/Makefile.bcc32 | 13 +- ext/mbstring/libmbfl/mbfl/mbfilter.h | 7 + ext/mbstring/libmbfl/mbfl/mbfilter_8bit.h | 2 +- ext/mbstring/libmbfl/mbfl/mbfilter_pass.h | 4 +- ext/mbstring/libmbfl/mbfl/mbfilter_wchar.h | 2 +- ext/mbstring/libmbfl/mbfl/mbfl_convert.c | 7 + ext/mbstring/libmbfl/mbfl/mbfl_encoding.c | 25 +- ext/mbstring/libmbfl/mbfl/mbfl_encoding.h | 2 + ext/mbstring/libmbfl/mbfl/mbfl_ident.c | 7 + ext/mbstring/libmbfl/mbfl/mbfl_language.c | 2 + ext/mbstring/libmbfl/mbfl/mbfl_language.h | 1 + ext/mbstring/libmbfl/nls/Makefile.am | 23 +- ext/mbstring/libmbfl/nls/Makefile.bcc32 | 12 +- ext/mbstring/libmbfl/nls/nls_ua.c | 22 + ext/mbstring/libmbfl/nls/nls_ua.h | 9 + ext/mbstring/libmbfl/tests/Makefile.am | 10 + ext/mbstring/libmbfl/tests/conv_encoding.c | 104 ++ .../tests/conv_encoding.tests/Makefile.am | 1 + .../cp51932_cp50220raw.exp | 33 + .../tests/conv_encoding.tests/ujis_sjis.exp | 35 + .../tests/conv_encoding.tests/utf8_sjis.exp | 35 + ext/mbstring/libmbfl/tests/conv_kana.c | 147 +++ ext/mbstring/libmbfl/tests/strcut.c | 113 ++ .../libmbfl/tests/strcut.tests/Makefile.am | 1 + .../libmbfl/tests/strcut.tests/iso2022jp.exp | 129 ++ .../libmbfl/tests/strcut.tests/ujis.exp | 91 ++ .../libmbfl/tests/strcut.tests/utf8.exp | 91 ++ .../libmbfl/tests/strwidth.tests/Makefile.am | 1 + .../tests/strwidth.tests/conv_encoding.c | 104 ++ .../conv_encoding.tests/Makefile.am | 1 + .../cp51932_cp50220raw.exp | 33 + .../conv_encoding.tests/ujis_sjis.exp | 35 + .../conv_encoding.tests/utf8_sjis.exp | 35 + .../libmbfl/tests/strwidth.tests/conv_kana.c | 147 +++ .../conv_kana.tests/Makefile.am | 1 + .../conv_kana.tests/conv_kana.exp | 1098 +++++++++++++++++ .../libmbfl/tests/strwidth.tests/strwidth.exp | 47 + ext/mbstring/mbstring.c | 199 ++- ext/mbstring/mbstring.h | 10 - ext/mbstring/oniguruma/COPYING | 4 +- ext/mbstring/oniguruma/HISTORY | 487 +++++++- ext/mbstring/oniguruma/README | 64 +- ext/mbstring/oniguruma/README.ja | 67 +- ext/mbstring/oniguruma/config.h.in | 125 +- ext/mbstring/oniguruma/enc/big5.c | 2 +- ext/mbstring/oniguruma/enc/euc_jp.c | 20 +- ext/mbstring/oniguruma/enc/euc_kr.c | 2 +- ext/mbstring/oniguruma/enc/euc_tw.c | 2 +- ext/mbstring/oniguruma/enc/iso8859_1.c | 45 +- ext/mbstring/oniguruma/enc/iso8859_10.c | 51 +- ext/mbstring/oniguruma/enc/iso8859_11.c | 14 +- ext/mbstring/oniguruma/enc/iso8859_13.c | 191 ++- ext/mbstring/oniguruma/enc/iso8859_14.c | 245 ++-- ext/mbstring/oniguruma/enc/iso8859_15.c | 70 +- ext/mbstring/oniguruma/enc/iso8859_16.c | 233 ++-- ext/mbstring/oniguruma/enc/iso8859_2.c | 51 +- ext/mbstring/oniguruma/enc/iso8859_3.c | 51 +- ext/mbstring/oniguruma/enc/iso8859_4.c | 51 +- ext/mbstring/oniguruma/enc/iso8859_5.c | 20 +- ext/mbstring/oniguruma/enc/iso8859_6.c | 14 +- ext/mbstring/oniguruma/enc/iso8859_7.c | 20 +- ext/mbstring/oniguruma/enc/iso8859_8.c | 14 +- ext/mbstring/oniguruma/enc/iso8859_9.c | 51 +- ext/mbstring/oniguruma/enc/koi8.c | 28 +- ext/mbstring/oniguruma/enc/koi8_r.c | 25 +- ext/mbstring/oniguruma/enc/mktable.c | 16 +- ext/mbstring/oniguruma/enc/sjis.c | 21 +- ext/mbstring/oniguruma/enc/unicode.c | 37 +- ext/mbstring/oniguruma/enc/utf16_be.c | 39 +- ext/mbstring/oniguruma/enc/utf16_le.c | 36 +- ext/mbstring/oniguruma/enc/utf32_be.c | 41 +- ext/mbstring/oniguruma/enc/utf32_le.c | 41 +- ext/mbstring/oniguruma/enc/utf8.c | 126 +- ext/mbstring/oniguruma/index.html | 181 +-- ext/mbstring/oniguruma/onigcmpt200.h | 6 + ext/mbstring/oniguruma/oniggnu.h | 8 +- ext/mbstring/oniguruma/oniguruma.h | 230 ++-- ext/mbstring/oniguruma/regcomp.c | 1082 +++++++++++----- ext/mbstring/oniguruma/regenc.c | 72 +- ext/mbstring/oniguruma/regenc.h | 26 +- ext/mbstring/oniguruma/regerror.c | 95 +- ext/mbstring/oniguruma/regexec.c | 727 ++++++++--- ext/mbstring/oniguruma/regext.c | 4 +- ext/mbstring/oniguruma/reggnu.c | 12 +- ext/mbstring/oniguruma/regint.h | 129 +- ext/mbstring/oniguruma/regparse.c | 690 +++++++---- ext/mbstring/oniguruma/regparse.h | 48 +- ext/mbstring/oniguruma/regposix.c | 11 +- ext/mbstring/oniguruma/regsyntax.c | 33 +- ext/mbstring/oniguruma/regversion.c | 4 +- ext/mbstring/oniguruma/st.c | 140 +-- ext/mbstring/oniguruma/st.h | 16 +- ext/mbstring/php_mbregex.c | 7 + ext/mbstring/php_unicode.c | 6 +- ext/mbstring/tests/bug43994.phpt | 104 +- ext/mbstring/tests/bug43998.phpt | 30 +- ext/mbstring/tests/mb_strstr.phpt | 35 + 124 files changed, 8427 insertions(+), 2908 deletions(-) create mode 100644 ext/mbstring/libmbfl/filters/mbfilter_cp1254.c create mode 100644 ext/mbstring/libmbfl/filters/mbfilter_cp1254.h create mode 100644 ext/mbstring/libmbfl/filters/mbfilter_cp850.c create mode 100644 ext/mbstring/libmbfl/filters/mbfilter_cp850.h create mode 100644 ext/mbstring/libmbfl/filters/mbfilter_iso2022_jp_ms.c create mode 100644 ext/mbstring/libmbfl/filters/mbfilter_iso2022_jp_ms.h create mode 100644 ext/mbstring/libmbfl/filters/mbfilter_koi8u.c create mode 100644 ext/mbstring/libmbfl/filters/mbfilter_koi8u.h create mode 100644 ext/mbstring/libmbfl/filters/unicode_table_cp1254.h create mode 100644 ext/mbstring/libmbfl/filters/unicode_table_cp850.h create mode 100644 ext/mbstring/libmbfl/filters/unicode_table_koi8u.h create mode 100644 ext/mbstring/libmbfl/nls/nls_ua.c create mode 100644 ext/mbstring/libmbfl/nls/nls_ua.h create mode 100644 ext/mbstring/libmbfl/tests/Makefile.am create mode 100644 ext/mbstring/libmbfl/tests/conv_encoding.c create mode 100644 ext/mbstring/libmbfl/tests/conv_encoding.tests/Makefile.am create mode 100644 ext/mbstring/libmbfl/tests/conv_encoding.tests/cp51932_cp50220raw.exp create mode 100644 ext/mbstring/libmbfl/tests/conv_encoding.tests/ujis_sjis.exp create mode 100644 ext/mbstring/libmbfl/tests/conv_encoding.tests/utf8_sjis.exp create mode 100644 ext/mbstring/libmbfl/tests/conv_kana.c create mode 100644 ext/mbstring/libmbfl/tests/strcut.c create mode 100644 ext/mbstring/libmbfl/tests/strcut.tests/Makefile.am create mode 100644 ext/mbstring/libmbfl/tests/strcut.tests/iso2022jp.exp create mode 100644 ext/mbstring/libmbfl/tests/strcut.tests/ujis.exp create mode 100644 ext/mbstring/libmbfl/tests/strcut.tests/utf8.exp create mode 100644 ext/mbstring/libmbfl/tests/strwidth.tests/Makefile.am create mode 100644 ext/mbstring/libmbfl/tests/strwidth.tests/conv_encoding.c create mode 100644 ext/mbstring/libmbfl/tests/strwidth.tests/conv_encoding.tests/Makefile.am create mode 100644 ext/mbstring/libmbfl/tests/strwidth.tests/conv_encoding.tests/cp51932_cp50220raw.exp create mode 100644 ext/mbstring/libmbfl/tests/strwidth.tests/conv_encoding.tests/ujis_sjis.exp create mode 100644 ext/mbstring/libmbfl/tests/strwidth.tests/conv_encoding.tests/utf8_sjis.exp create mode 100644 ext/mbstring/libmbfl/tests/strwidth.tests/conv_kana.c create mode 100644 ext/mbstring/libmbfl/tests/strwidth.tests/conv_kana.tests/Makefile.am create mode 100644 ext/mbstring/libmbfl/tests/strwidth.tests/conv_kana.tests/conv_kana.exp create mode 100644 ext/mbstring/libmbfl/tests/strwidth.tests/strwidth.exp create mode 100644 ext/mbstring/tests/mb_strstr.phpt diff --git a/ext/mbstring/config.m4 b/ext/mbstring/config.m4 index e61ba3c1b2..a5e7920cdf 100644 --- a/ext/mbstring/config.m4 +++ b/ext/mbstring/config.m4 @@ -182,9 +182,9 @@ int main() { return foo(10, "", 3.14); } PHP_EVAL_LIBLINE([$MBSTRING_SHARED_LIBADD], LDFLAGS) AC_MSG_CHECKING([if oniguruma has an invalid entry for KOI8 encoding]) AC_TRY_LINK([ - #include +#include ], [ - return (int)(ONIG_ENCODING_KOI8 + 1); +return (int)(ONIG_ENCODING_KOI8 + 1); ], [ AC_MSG_RESULT([no]) ], [ @@ -228,6 +228,7 @@ AC_DEFUN([PHP_MBSTRING_SETUP_LIBMBFL], [ libmbfl/filters/mbfilter_byte4.c libmbfl/filters/mbfilter_cp1251.c libmbfl/filters/mbfilter_cp1252.c + libmbfl/filters/mbfilter_cp1254.c libmbfl/filters/mbfilter_cp866.c libmbfl/filters/mbfilter_cp932.c libmbfl/filters/mbfilter_cp936.c @@ -255,6 +256,7 @@ AC_DEFUN([PHP_MBSTRING_SETUP_LIBMBFL], [ libmbfl/filters/mbfilter_iso8859_8.c libmbfl/filters/mbfilter_iso8859_9.c libmbfl/filters/mbfilter_jis.c + libmbfl/filters/mbfilter_iso2022_jp_ms.c libmbfl/filters/mbfilter_koi8r.c libmbfl/filters/mbfilter_armscii8.c libmbfl/filters/mbfilter_qprint.c @@ -268,6 +270,7 @@ AC_DEFUN([PHP_MBSTRING_SETUP_LIBMBFL], [ libmbfl/filters/mbfilter_utf7imap.c libmbfl/filters/mbfilter_utf8.c libmbfl/filters/mbfilter_uuencode.c + libmbfl/filters/mbfilter_koi8u.c libmbfl/filters/mbfilter_cp850.c libmbfl/mbfl/mbfilter.c libmbfl/mbfl/mbfilter_8bit.c @@ -291,9 +294,9 @@ AC_DEFUN([PHP_MBSTRING_SETUP_LIBMBFL], [ libmbfl/nls/nls_zh.c libmbfl/nls/nls_hy.c libmbfl/nls/nls_tr.c + libmbfl/nls/nls_ua.c ]) PHP_MBSTRING_ADD_CFLAG([-DHAVE_CONFIG_H]) - PHP_MBSTRING_ADD_INSTALL_HEADERS([libmbfl/config.h libmbfl/mbfl/eaw_table.h libmbfl/mbfl/mbfilter.h libmbfl/mbfl/mbfilter_8bit.h libmbfl/mbfl/mbfilter_pass.h libmbfl/mbfl/mbfilter_wchar.h libmbfl/mbfl/mbfl_allocators.h libmbfl/mbfl/mbfl_consts.h libmbfl/mbfl/mbfl_convert.h libmbfl/mbfl/mbfl_defs.h libmbfl/mbfl/mbfl_encoding.h libmbfl/mbfl/mbfl_filter_output.h libmbfl/mbfl/mbfl_ident.h libmbfl/mbfl/mbfl_language.h libmbfl/mbfl/mbfl_memory_device.h libmbfl/mbfl/mbfl_string.h]) else dnl @@ -337,8 +340,8 @@ PHP_ARG_ENABLE([mbregex_backtrack], [whether to check multibyte regex backtrack] MBSTRING: Disable multibyte regex backtrack check], yes, no) PHP_ARG_WITH(libmbfl, [for external libmbfl], -[ --with-libmbfl[=DIR] MBSTRING: Use external libmbfl. DIR is the libmbfl install prefix. - If DIR is not set, the bundled libmbfl will be used], no, no) +[ --with-libmbfl[=DIR] MBSTRING: Use external libmbfl. DIR is the libmbfl base + install directory [BUNDLED]], no, no) PHP_ARG_WITH(onig, [for external oniguruma], [ --with-onig[=DIR] MBSTRING: Use external oniguruma. DIR is the oniguruma install prefix. diff --git a/ext/mbstring/config.w32 b/ext/mbstring/config.w32 index 87f68dbd3b..f452c1868a 100644 --- a/ext/mbstring/config.w32 +++ b/ext/mbstring/config.w32 @@ -3,6 +3,7 @@ ARG_ENABLE("mbstring", "multibyte string functions", "no"); ARG_ENABLE("mbregex", "multibyte regex support", "no"); +ARG_ENABLE("mbregex-backtrack", "check multibyte regex backtrack", "yes"); if (PHP_MBSTRING == "yes") { @@ -11,7 +12,7 @@ if (PHP_MBSTRING == "yes") { FSO.CopyFile("ext\\mbstring\\oniguruma\\win32\\config.h", "ext\\mbstring\\oniguruma\\config.h", true); - EXTENSION("mbstring", "mbstring.c php_unicode.c mb_gpc.c", null, + EXTENSION("mbstring", "mbstring.c php_unicode.c mb_gpc.c", true, "-Iext/mbstring/libmbfl -Iext/mbstring/libmbfl/mbfl \ -Iext/mbstring/oniguruma /D NOT_RUBY=1 /D LIBMBFL_EXPORTS=1 \ /D HAVE_STDARG_PROTOTYPES=1 /D HAVE_CONFIG_H /D HAVE_STDLIB_H \ @@ -28,10 +29,12 @@ if (PHP_MBSTRING == "yes") { mbfilter_iso8859_2.c mbfilter_iso8859_3.c mbfilter_iso8859_4.c \ mbfilter_iso8859_5.c mbfilter_iso8859_6.c mbfilter_iso8859_7.c \ mbfilter_iso8859_8.c mbfilter_iso8859_9.c mbfilter_jis.c \ + mbfilter_iso2022_jp_ms.c \ mbfilter_koi8r.c mbfilter_qprint.c mbfilter_sjis.c mbfilter_ucs2.c \ mbfilter_ucs4.c mbfilter_uhc.c mbfilter_utf16.c mbfilter_utf32.c \ mbfilter_utf7.c mbfilter_utf7imap.c mbfilter_utf8.c \ - mbfilter_uuencode.c mbfilter_armscii8.c", "mbstring"); + mbfilter_koi8u.c mbfilter_cp1254.c \ + mbfilter_uuencode.c mbfilter_armscii8.c mbfilter_cp850.c ", "mbstring"); ADD_SOURCES("ext/mbstring/libmbfl/mbfl", "mbfilter.c mbfilter_8bit.c \ mbfilter_pass.c mbfilter_wchar.c mbfl_convert.c mbfl_encoding.c \ @@ -40,7 +43,7 @@ if (PHP_MBSTRING == "yes") { ADD_SOURCES("ext/mbstring/libmbfl/nls", "nls_de.c nls_en.c nls_ja.c \ nls_kr.c nls_neutral.c nls_ru.c nls_uni.c nls_zh.c nls_hy.c \ - nls_tr.c", "mbstring"); + nls_ua.c nls_tr.c", "mbstring"); AC_DEFINE('HAVE_MBSTRING', 1, 'Have mbstring support'); AC_DEFINE('HAVE_MBSTR_CN', 1, 'CN'); @@ -53,6 +56,9 @@ if (PHP_MBSTRING == "yes") { AC_DEFINE('HAVE_STDARG_PROTOTYPES', 1, 'have stdarg.h'); AC_DEFINE('HAVE_MBREGEX', 1); AC_DEFINE('HAVE_ONIG', 1); + if (PHP_MBREGEX_BACKTRACK != "no") { + AC_DEFINE('USE_COMBINATION_EXPLOSION_CHECK', 1); + } ADD_SOURCES("ext/mbstring/oniguruma", "regcomp.c regerror.c \ regenc.c regexec.c reggnu.c regparse.c regposerr.c \ regext.c regsyntax.c regtrav.c regversion.c st.c", "mbstring"); @@ -62,7 +68,7 @@ if (PHP_MBSTRING == "yes") { iso8859_7.c iso8859_8.c iso8859_9.c iso8859_10.c \ iso8859_11.c iso8859_13.c iso8859_14.c iso8859_15.c iso8859_16.c \ koi8.c koi8_r.c sjis.c utf8.c unicode.c utf16_be.c utf16_le.c \ - utf32_be.c utf32_le.c", "mbstring"); + utf32_be.c utf32_le.c gb18030.c", "mbstring"); ADD_SOURCES("ext/mbstring", "php_mbregex.c", "mbstring"); } } diff --git a/ext/mbstring/libmbfl/AUTHORS b/ext/mbstring/libmbfl/AUTHORS index e606231518..9a9f2f9fa3 100644 --- a/ext/mbstring/libmbfl/AUTHORS +++ b/ext/mbstring/libmbfl/AUTHORS @@ -1,10 +1,13 @@ -Den V. Tsopa -Hironori Sato Marcus Boerger -Moriyoshi Koizumi +Hayk Chamyan +Wez Furlong Rui Hirokawa Shigeru Kanemoto -Tsukada Takuya -Tateyama U. Kenkichi -Wez Furlong +Moriyoshi Koizumi +Hironori Sato +Tsukada Takuya +Tateyama +Den V. Tsopa +Maksym Veremeyenko +Haluk AKIN diff --git a/ext/mbstring/libmbfl/Makefile.am b/ext/mbstring/libmbfl/Makefile.am index 7f60683435..070a7fcc62 100644 --- a/ext/mbstring/libmbfl/Makefile.am +++ b/ext/mbstring/libmbfl/Makefile.am @@ -1,5 +1,9 @@ +AUTOMAKE_OPTIONS=dejagnu +DEJATOOL=conv_encoding conv_kana strwidth strcut +RUNTESTDEFAULTFLAGS=--tool $$tool --srcdir "$$srcdir"/tests +LANG=C EXTRA_DIST=AUTHORS DISCLAIMER LICENSE Makefile.bcc32 \ config.h.bcc32 config.h.vc6 \ libmbfl.dsp libmbfl.dsw libmbfl.sln libmbfl.vcproj mbfl.rc \ mksbcc32.bat rules.mak.bcc32 -SUBDIRS = nls filters mbfl +SUBDIRS = nls filters mbfl tests diff --git a/ext/mbstring/libmbfl/configure.in b/ext/mbstring/libmbfl/configure.in index 25d2e59373..47e1026c7d 100644 --- a/ext/mbstring/libmbfl/configure.in +++ b/ext/mbstring/libmbfl/configure.in @@ -1,10 +1,10 @@ # Process this file with autoconf to produce a configure script. AC_INIT(mbfl/mbfilter.c) -AM_INIT_AUTOMAKE(libmbfl, 1.0.0) +AM_INIT_AUTOMAKE(libmbfl, 1.0.2) AC_CONFIG_SRCDIR(mbfl/mbfilter.c) AM_CONFIG_HEADER(config.h) -SHLIB_VERSION="1:0:0" +SHLIB_VERSION="1:0:2" AC_SUBST(SHLIB_VERSION) # Checks for programs. @@ -34,5 +34,14 @@ if test "$FETCH_VIA_FTP" = "curl"; then FETCH_VIA_FTP="curl -O" fi -AC_CONFIG_FILES([Makefile mbfl/Makefile filters/Makefile nls/Makefile]) +AC_CONFIG_FILES([ + Makefile + mbfl/Makefile + filters/Makefile + nls/Makefile + tests/Makefile + tests/conv_encoding.tests/Makefile + tests/conv_kana.tests/Makefile + tests/strwidth.tests/Makefile + tests/strcut.tests/Makefile]) AC_OUTPUT diff --git a/ext/mbstring/libmbfl/filters/Makefile.am b/ext/mbstring/libmbfl/filters/Makefile.am index 9b2fda4c39..802af4e61d 100644 --- a/ext/mbstring/libmbfl/filters/Makefile.am +++ b/ext/mbstring/libmbfl/filters/Makefile.am @@ -2,7 +2,139 @@ EXTRA_DIST=Makefile.bcc32 mk_sb_tbl.awk noinst_LTLIBRARIES=libmbfl_filters.la INCLUDES=-I../mbfl libmbfl_filters_la_LDFLAGS=-version-info $(SHLIB_VERSION) -libmbfl_filters_la_SOURCES=mbfilter_cp936.c mbfilter_hz.c mbfilter_euc_tw.c mbfilter_big5.c mbfilter_euc_jp.c mbfilter_jis.c mbfilter_iso8859_1.c mbfilter_iso8859_2.c mbfilter_cp1252.c mbfilter_cp1251.c mbfilter_ascii.c mbfilter_iso8859_3.c mbfilter_iso8859_4.c mbfilter_iso8859_5.c mbfilter_iso8859_6.c mbfilter_iso8859_7.c mbfilter_iso8859_8.c mbfilter_iso8859_9.c mbfilter_iso8859_10.c mbfilter_iso8859_13.c mbfilter_iso8859_14.c mbfilter_iso8859_15.c mbfilter_iso8859_16.c mbfilter_htmlent.c mbfilter_byte2.c mbfilter_byte4.c mbfilter_uuencode.c mbfilter_base64.c mbfilter_sjis.c mbfilter_7bit.c mbfilter_qprint.c mbfilter_ucs4.c mbfilter_ucs2.c mbfilter_utf32.c mbfilter_utf16.c mbfilter_utf8.c mbfilter_utf7.c mbfilter_utf7imap.c mbfilter_euc_jp_win.c mbfilter_cp932.c mbfilter_cp51932.c mbfilter_euc_cn.c mbfilter_euc_kr.c mbfilter_uhc.c mbfilter_iso2022_kr.c mbfilter_cp866.c mbfilter_koi8r.c mbfilter_armscii8.c html_entities.c cp932_table.h html_entities.h mbfilter_7bit.h mbfilter_ascii.h mbfilter_base64.h mbfilter_big5.h mbfilter_byte2.h mbfilter_byte4.h mbfilter_cp1251.h mbfilter_cp1252.h mbfilter_cp866.h mbfilter_cp932.h mbfilter_cp51932.h mbfilter_cp936.h mbfilter_euc_cn.h mbfilter_euc_jp.h mbfilter_euc_jp_win.h mbfilter_euc_kr.h mbfilter_euc_tw.h mbfilter_htmlent.h mbfilter_hz.h mbfilter_iso2022_kr.h mbfilter_iso8859_1.h mbfilter_iso8859_10.h mbfilter_iso8859_13.h mbfilter_iso8859_14.h mbfilter_iso8859_15.h mbfilter_iso8859_16.h mbfilter_iso8859_2.h mbfilter_iso8859_3.h mbfilter_iso8859_4.h mbfilter_iso8859_5.h mbfilter_iso8859_6.h mbfilter_iso8859_7.h mbfilter_iso8859_8.h mbfilter_iso8859_9.h mbfilter_jis.h mbfilter_koi8r.h mbfilter_armscii8.h mbfilter_qprint.h mbfilter_sjis.h mbfilter_ucs2.h mbfilter_ucs4.h mbfilter_uhc.h mbfilter_utf16.h mbfilter_utf32.h mbfilter_utf7.h mbfilter_utf7imap.h mbfilter_utf8.h mbfilter_uuencode.h unicode_prop.h unicode_table_big5.h unicode_table_cns11643.h unicode_table_cp1251.h unicode_table_cp1252.h unicode_table_cp866.h unicode_table_cp932_ext.h unicode_table_cp936.h unicode_table_iso8859_10.h unicode_table_iso8859_13.h unicode_table_iso8859_14.h unicode_table_iso8859_15.h unicode_table_iso8859_16.h unicode_table_iso8859_2.h unicode_table_iso8859_3.h unicode_table_iso8859_4.h unicode_table_iso8859_5.h unicode_table_iso8859_6.h unicode_table_iso8859_7.h unicode_table_iso8859_8.h unicode_table_iso8859_9.h unicode_table_jis.h unicode_table_koi8r.h unicode_table_armscii8.h unicode_table_uhc.h +libmbfl_filters_la_SOURCES=mbfilter_cp936.c \ + mbfilter_hz.c \ + mbfilter_euc_tw.c \ + mbfilter_big5.c \ + mbfilter_euc_jp.c \ + mbfilter_jis.c \ + mbfilter_iso8859_1.c \ + mbfilter_iso8859_2.c \ + mbfilter_cp1254.c \ + mbfilter_cp1252.c \ + mbfilter_cp1251.c \ + mbfilter_ascii.c \ + mbfilter_iso8859_3.c \ + mbfilter_iso8859_4.c \ + mbfilter_iso8859_5.c \ + mbfilter_iso8859_6.c \ + mbfilter_iso8859_7.c \ + mbfilter_iso8859_8.c \ + mbfilter_iso8859_9.c \ + mbfilter_iso8859_10.c \ + mbfilter_iso8859_13.c \ + mbfilter_iso8859_14.c \ + mbfilter_iso8859_15.c \ + mbfilter_iso8859_16.c \ + mbfilter_htmlent.c \ + mbfilter_byte2.c \ + mbfilter_byte4.c \ + mbfilter_uuencode.c \ + mbfilter_base64.c \ + mbfilter_sjis.c \ + mbfilter_7bit.c \ + mbfilter_qprint.c \ + mbfilter_ucs4.c \ + mbfilter_ucs2.c \ + mbfilter_utf32.c \ + mbfilter_utf16.c \ + mbfilter_utf8.c \ + mbfilter_utf7.c \ + mbfilter_utf7imap.c \ + mbfilter_euc_jp_win.c \ + mbfilter_cp932.c \ + mbfilter_cp51932.c \ + mbfilter_euc_cn.c \ + mbfilter_euc_kr.c \ + mbfilter_uhc.c \ + mbfilter_iso2022_kr.c \ + mbfilter_cp866.c \ + mbfilter_koi8r.c \ + mbfilter_koi8u.c \ + mbfilter_armscii8.c \ + mbfilter_cp850.c \ + html_entities.c \ + cp932_table.h \ + html_entities.h \ + mbfilter_7bit.h \ + mbfilter_ascii.h \ + mbfilter_base64.h \ + mbfilter_big5.h \ + mbfilter_byte2.h \ + mbfilter_byte4.h \ + mbfilter_cp1251.h \ + mbfilter_cp1252.h \ + mbfilter_cp1254.h \ + mbfilter_cp866.h \ + mbfilter_cp932.h \ + mbfilter_cp936.h \ + mbfilter_euc_cn.h \ + mbfilter_euc_jp.h \ + mbfilter_euc_jp_win.h \ + mbfilter_euc_kr.h \ + mbfilter_euc_tw.h \ + mbfilter_htmlent.h \ + mbfilter_hz.h \ + mbfilter_iso2022_kr.h \ + mbfilter_iso8859_1.h \ + mbfilter_iso8859_10.h \ + mbfilter_iso8859_13.h \ + mbfilter_iso8859_14.h \ + mbfilter_iso8859_15.h \ + mbfilter_iso8859_16.h \ + mbfilter_iso8859_2.h \ + mbfilter_iso8859_3.h \ + mbfilter_iso8859_4.h \ + mbfilter_iso8859_5.h \ + mbfilter_iso8859_6.h \ + mbfilter_iso8859_7.h \ + mbfilter_iso8859_8.h \ + mbfilter_iso8859_9.h \ + mbfilter_jis.h \ + mbfilter_koi8r.h \ + mbfilter_koi8u.h \ + mbfilter_armscii8.h \ + mbfilter_qprint.h \ + mbfilter_sjis.h \ + mbfilter_ucs2.h \ + mbfilter_ucs4.h \ + mbfilter_uhc.h \ + mbfilter_utf16.h \ + mbfilter_utf32.h \ + mbfilter_utf7.h \ + mbfilter_utf7imap.h \ + mbfilter_utf8.h \ + mbfilter_uuencode.h \ + mbfilter_cp51932.h \ + mbfilter_cp850.h \ + unicode_prop.h \ + unicode_table_big5.h \ + unicode_table_cns11643.h \ + unicode_table_cp1251.h \ + unicode_table_cp1252.h \ + unicode_table_cp1254.h \ + unicode_table_cp866.h \ + unicode_table_cp932_ext.h \ + unicode_table_cp936.h \ + unicode_table_iso8859_10.h \ + unicode_table_iso8859_13.h \ + unicode_table_iso8859_14.h \ + unicode_table_iso8859_15.h \ + unicode_table_iso8859_16.h \ + unicode_table_iso8859_2.h \ + unicode_table_iso8859_3.h \ + unicode_table_iso8859_4.h \ + unicode_table_iso8859_5.h \ + unicode_table_iso8859_6.h \ + unicode_table_iso8859_7.h \ + unicode_table_iso8859_8.h \ + unicode_table_iso8859_9.h \ + unicode_table_jis.h \ + unicode_table_koi8r.h \ + unicode_table_koi8u.h \ + unicode_table_armscii8.h \ + unicode_table_cp850.h \ + unicode_table_uhc.h mbfilter_iso8859_2.c: unicode_table_iso8859_2.h diff --git a/ext/mbstring/libmbfl/filters/Makefile.bcc32 b/ext/mbstring/libmbfl/filters/Makefile.bcc32 index 03e1d88024..841c09632a 100644 --- a/ext/mbstring/libmbfl/filters/Makefile.bcc32 +++ b/ext/mbstring/libmbfl/filters/Makefile.bcc32 @@ -1,6 +1,56 @@ !include ..\rules.mak.bcc32 INCLUDES=$(INCLUDES) -I../mbfl -OBJS=mbfilter_cp936.obj mbfilter_hz.obj mbfilter_euc_tw.obj mbfilter_big5.obj mbfilter_euc_jp.obj mbfilter_jis.obj mbfilter_iso8859_1.obj mbfilter_iso8859_2.obj mbfilter_cp1252.obj mbfilter_cp1251.obj mbfilter_ascii.obj mbfilter_iso8859_3.obj mbfilter_iso8859_4.obj mbfilter_iso8859_5.obj mbfilter_iso8859_6.obj mbfilter_iso8859_7.obj mbfilter_iso8859_8.obj mbfilter_iso8859_9.obj mbfilter_iso8859_10.obj mbfilter_iso8859_13.obj mbfilter_iso8859_14.obj mbfilter_iso8859_15.obj mbfilter_iso8859_16.obj mbfilter_htmlent.obj mbfilter_byte2.obj mbfilter_byte4.obj mbfilter_uuencode.obj mbfilter_base64.obj mbfilter_sjis.obj mbfilter_7bit.obj mbfilter_qprint.obj mbfilter_ucs4.obj mbfilter_ucs2.obj mbfilter_utf32.obj mbfilter_utf16.obj mbfilter_utf8.obj mbfilter_utf7.obj mbfilter_utf7imap.obj mbfilter_euc_jp_win.obj mbfilter_cp932.obj mbfilter_euc_cn.obj mbfilter_euc_kr.obj mbfilter_uhc.obj mbfilter_iso2022_kr.obj mbfilter_cp866.obj mbfilter_koi8r.obj html_entities.obj mbfilter_armscii8.obj +OBJS=mbfilter_cp936.obj \ + mbfilter_hz.obj \ + mbfilter_euc_tw.obj \ + mbfilter_big5.obj \ + mbfilter_euc_jp.obj \ + mbfilter_jis.obj \ + mbfilter_iso8859_1.obj \ + mbfilter_iso8859_2.obj \ + mbfilter_cp1252.obj \ + mbfilter_cp1251.obj \ + mbfilter_cp1254.obj \ + mbfilter_ascii.obj \ + mbfilter_iso8859_3.obj \ + mbfilter_iso8859_4.obj \ + mbfilter_iso8859_5.obj \ + mbfilter_iso8859_6.obj \ + mbfilter_iso8859_7.obj \ + mbfilter_iso8859_8.obj \ + mbfilter_iso8859_9.obj \ + mbfilter_iso8859_10.obj \ + mbfilter_iso8859_13.obj \ + mbfilter_iso8859_14.obj \ + mbfilter_iso8859_15.obj \ + mbfilter_iso8859_16.obj \ + mbfilter_htmlent.obj \ + mbfilter_byte2.obj \ + mbfilter_byte4.obj \ + mbfilter_uuencode.obj \ + mbfilter_base64.obj \ + mbfilter_sjis.obj \ + mbfilter_7bit.obj \ + mbfilter_qprint.obj \ + mbfilter_ucs4.obj \ + mbfilter_ucs2.obj \ + mbfilter_utf32.obj \ + mbfilter_utf16.obj \ + mbfilter_utf8.obj \ + mbfilter_utf7.obj \ + mbfilter_utf7imap.obj \ + mbfilter_euc_jp_win.obj \ + mbfilter_cp932.obj \ + mbfilter_euc_cn.obj \ + mbfilter_euc_kr.obj \ + mbfilter_uhc.obj \ + mbfilter_iso2022_kr.obj \ + mbfilter_cp866.obj \ + mbfilter_koi8r.obj \ + mbfilter_koi8u.obj \ + html_entities.obj \ + mbfilter_armscii8.obj \ + mbfilter_cp850.obj all: $(OBJS) diff --git a/ext/mbstring/libmbfl/filters/mbfilter_cp1254.c b/ext/mbstring/libmbfl/filters/mbfilter_cp1254.c new file mode 100644 index 0000000000..7e933425f4 --- /dev/null +++ b/ext/mbstring/libmbfl/filters/mbfilter_cp1254.c @@ -0,0 +1,157 @@ +/* + * "streamable kanji code filter and converter" + * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved. + * + * LICENSE NOTICES + * + * This file is part of "streamable kanji code filter and converter", + * which is distributed under the terms of GNU Lesser General Public + * License (version 2) as published by the Free Software Foundation. + * + * This software is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with "streamable kanji code filter and converter"; + * if not, write to the Free Software Foundation, Inc., 59 Temple Place, + * Suite 330, Boston, MA 02111-1307 USA + * + * The author of this part: Haluk AKIN + * + */ +/* + * The source code included in this files was separated from mbfilter_ru.c + * by moriyoshi koizumi on 4 dec 2002. + * + */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include "mbfilter.h" +#include "mbfilter_cp1254.h" +#include "unicode_table_cp1254.h" + +static int mbfl_filt_ident_cp1254(int c, mbfl_identify_filter *filter); + +static const char *mbfl_encoding_cp1254_aliases[] = {"CP1254", "CP-1254", "WINDOWS-1254", NULL}; + +const mbfl_encoding mbfl_encoding_cp1254 = { + mbfl_no_encoding_cp1254, + "Windows-1254", + "Windows-1254", + (const char *(*)[])&mbfl_encoding_cp1254_aliases, + NULL, + MBFL_ENCTYPE_SBCS +}; + +const struct mbfl_identify_vtbl vtbl_identify_cp1254 = { + mbfl_no_encoding_cp1254, + mbfl_filt_ident_common_ctor, + mbfl_filt_ident_common_dtor, + mbfl_filt_ident_cp1254 +}; + +const struct mbfl_convert_vtbl vtbl_cp1254_wchar = { + mbfl_no_encoding_cp1254, + mbfl_no_encoding_wchar, + mbfl_filt_conv_common_ctor, + mbfl_filt_conv_common_dtor, + mbfl_filt_conv_cp1254_wchar, + mbfl_filt_conv_common_flush +}; + +const struct mbfl_convert_vtbl vtbl_wchar_cp1254 = { + mbfl_no_encoding_wchar, + mbfl_no_encoding_cp1254, + mbfl_filt_conv_common_ctor, + mbfl_filt_conv_common_dtor, + mbfl_filt_conv_wchar_cp1254, + mbfl_filt_conv_common_flush +}; + +#define CK(statement) do { if ((statement) < 0) return (-1); } while (0) + +/* + * wchar => cp1254 + */ +int +mbfl_filt_conv_wchar_cp1254(int c, mbfl_convert_filter *filter) +{ + int s, n; + + if (c < 0x80) { + s = c; + } else { + s = -1; + n = cp1254_ucs_table_len-1; + while (n >= 0) { + if (c == cp1254_ucs_table[n] && c != 0xfffe) { + s = cp1254_ucs_table_min + n; + break; + } + n--; + } + if (s <= 0 && (c & ~MBFL_WCSPLANE_MASK) == MBFL_WCSPLANE_CP1254) { + s = c & MBFL_WCSPLANE_MASK; + } + } + + if (s >= 0) { + CK((*filter->output_function)(s, filter->data)); + } else { + if (filter->illegal_mode != MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE) { + CK(mbfl_filt_conv_illegal_output(c, filter)); + } + } + + return c; +} + +/* + * cp1254 => wchar + */ +int +mbfl_filt_conv_cp1254_wchar(int c, mbfl_convert_filter *filter) +{ + int s; + + if (c >= 0 && c < cp1254_ucs_table_min) { + s = c; + } else if (c >= cp1254_ucs_table_min && c < 0x100) { + s = cp1254_ucs_table[c - cp1254_ucs_table_min]; + if (s <= 0) { + s = c; + s &= MBFL_WCSPLANE_MASK; + s |= MBFL_WCSPLANE_CP1254; + } + } else { + s = c; + s &= MBFL_WCSGROUP_MASK; + s |= MBFL_WCSGROUP_THROUGH; + } + + CK((*filter->output_function)(s, filter->data)); + + return c; +} + +/* We only distinguish the MS extensions to ISO-8859-1. + * Actually, this is pretty much a NO-OP, since the identification + * system doesn't allow us to discriminate between a positive match, + * a possible match and a definite non-match. + * The problem here is that cp1254 looks like SJIS for certain chars. + * */ +static int mbfl_filt_ident_cp1254(int c, mbfl_identify_filter *filter) +{ + if (c >= 0x80 && c < 0xff) + filter->flag = 0; + else + filter->flag = 1; /* not it */ + return c; +} + + diff --git a/ext/mbstring/libmbfl/filters/mbfilter_cp1254.h b/ext/mbstring/libmbfl/filters/mbfilter_cp1254.h new file mode 100644 index 0000000000..3200d520eb --- /dev/null +++ b/ext/mbstring/libmbfl/filters/mbfilter_cp1254.h @@ -0,0 +1,43 @@ +/* + * "streamable kanji code filter and converter" + * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved. + * + * LICENSE NOTICES + * + * This file is part of "streamable kanji code filter and converter", + * which is distributed under the terms of GNU Lesser General Public + * License (version 2) as published by the Free Software Foundation. + * + * This software is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with "streamable kanji code filter and converter"; + * if not, write to the Free Software Foundation, Inc., 59 Temple Place, + * Suite 330, Boston, MA 02111-1307 USA + * + * The author of this part: Haluk AKIN + * + */ +/* + * the source code included in this files was separated from mbfilter.c + * by moriyoshi koizumi on 4 dec 2002. + * + */ + +#ifndef MBFL_MBFILTER_CP1254_H +#define MBFL_MBFILTER_CP1254_H + +#include "mbfilter.h" + +extern const mbfl_encoding mbfl_encoding_cp1254; +extern const struct mbfl_identify_vtbl vtbl_identify_cp1254; +extern const struct mbfl_convert_vtbl vtbl_cp1254_wchar; +extern const struct mbfl_convert_vtbl vtbl_wchar_cp1254; + +int mbfl_filt_conv_wchar_cp1254(int c, mbfl_convert_filter *filter); +int mbfl_filt_conv_cp1254_wchar(int c, mbfl_convert_filter *filter); + +#endif /* MBFL_MBFILTER_CP1254_H */ diff --git a/ext/mbstring/libmbfl/filters/mbfilter_cp850.c b/ext/mbstring/libmbfl/filters/mbfilter_cp850.c new file mode 100644 index 0000000000..5388c048b1 --- /dev/null +++ b/ext/mbstring/libmbfl/filters/mbfilter_cp850.c @@ -0,0 +1,147 @@ +/* + * "streamable kanji code filter and converter" + * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved. + * + * LICENSE NOTICES + * + * This file is part of "streamable kanji code filter and converter", + * which is distributed under the terms of GNU Lesser General Public + * License (version 2) as published by the Free Software Foundation. + * + * This software is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with "streamable kanji code filter and converter"; + * if not, write to the Free Software Foundation, Inc., 59 Temple Place, + * Suite 330, Boston, MA 02111-1307 USA + * + * The author of this part: Den V. Tsopa + * Adaption for CP850: D. Giffeler + * + */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include "mbfilter.h" +#include "mbfilter_cp850.h" +#include "unicode_table_cp850.h" + +static int mbfl_filt_ident_cp850(int c, mbfl_identify_filter *filter); + +static const char *mbfl_encoding_cp850_aliases[] = {"CP850", "CP-850", "IBM-850", NULL}; + +const mbfl_encoding mbfl_encoding_cp850 = { + mbfl_no_encoding_cp850, + "CP850", + "CP850", + (const char *(*)[])&mbfl_encoding_cp850_aliases, + NULL, + MBFL_ENCTYPE_SBCS +}; + +const struct mbfl_identify_vtbl vtbl_identify_cp850 = { + mbfl_no_encoding_cp850, + mbfl_filt_ident_common_ctor, + mbfl_filt_ident_common_dtor, + mbfl_filt_ident_cp850 +}; + +const struct mbfl_convert_vtbl vtbl_wchar_cp850 = { + mbfl_no_encoding_wchar, + mbfl_no_encoding_cp850, + mbfl_filt_conv_common_ctor, + mbfl_filt_conv_common_dtor, + mbfl_filt_conv_wchar_cp850, + mbfl_filt_conv_common_flush +}; + +const struct mbfl_convert_vtbl vtbl_cp850_wchar = { + mbfl_no_encoding_cp850, + mbfl_no_encoding_wchar, + mbfl_filt_conv_common_ctor, + mbfl_filt_conv_common_dtor, + mbfl_filt_conv_cp850_wchar, + mbfl_filt_conv_common_flush +}; + +#define CK(statement) do { if ((statement) < 0) return (-1); } while (0) + +/* + * cp850 => wchar + */ +int +mbfl_filt_conv_cp850_wchar(int c, mbfl_convert_filter *filter) +{ + int s; + + if (c >= 0 && c < cp850_ucs_table_min) { + s = c; + } else if (c >= cp850_ucs_table_min && c < 0x100) { + s = cp850_ucs_table[c - cp850_ucs_table_min]; + if (s <= 0) { + s = c; + s &= MBFL_WCSPLANE_MASK; + s |= MBFL_WCSPLANE_CP850; + } + } else { + s = c; + s &= MBFL_WCSGROUP_MASK; + s |= MBFL_WCSGROUP_THROUGH; + } + + CK((*filter->output_function)(s, filter->data)); + + return c; +} + +/* + * wchar => cp850 + */ +int +mbfl_filt_conv_wchar_cp850(int c, mbfl_convert_filter *filter) +{ + int s, n; + + if (c < 0x80) { + s = c; + } else { + s = -1; + n = cp850_ucs_table_len-1; + while (n >= 0) { + if (c == cp850_ucs_table[n]) { + s = cp850_ucs_table_min + n; + break; + } + n--; + } + if (s <= 0 && (c & ~MBFL_WCSPLANE_MASK) == MBFL_WCSPLANE_CP850) { + s = c & MBFL_WCSPLANE_MASK; + } + } + + if (s >= 0) { + CK((*filter->output_function)(s, filter->data)); + } else { + if (filter->illegal_mode != MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE) { + CK(mbfl_filt_conv_illegal_output(c, filter)); + } + } + + return c; +} + +static int mbfl_filt_ident_cp850(int c, mbfl_identify_filter *filter) +{ + if (c >= 0x80 && c < 0xff) + filter->flag = 0; + else + filter->flag = 1; /* not it */ + return c; +} + + diff --git a/ext/mbstring/libmbfl/filters/mbfilter_cp850.h b/ext/mbstring/libmbfl/filters/mbfilter_cp850.h new file mode 100644 index 0000000000..a1e0f9c0c2 --- /dev/null +++ b/ext/mbstring/libmbfl/filters/mbfilter_cp850.h @@ -0,0 +1,37 @@ +/* + * "streamable kanji code filter and converter" + * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved. + * + * LICENSE NOTICES + * + * This file is part of "streamable kanji code filter and converter", + * which is distributed under the terms of GNU Lesser General Public + * License (version 2) as published by the Free Software Foundation. + * + * This software is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with "streamable kanji code filter and converter"; + * if not, write to the Free Software Foundation, Inc., 59 Temple Place, + * Suite 330, Boston, MA 02111-1307 USA + * + * The author of this part: Den V. Tsopa + * Adaption for CP850: D. Giffeler + * + */ + +#ifndef MBFL_MBFILTER_CP850_H +#define MBFL_MBFILTER_CP850_H + +extern const mbfl_encoding mbfl_encoding_cp850; +extern const struct mbfl_identify_vtbl vtbl_identify_cp850; +extern const struct mbfl_convert_vtbl vtbl_wchar_cp850; +extern const struct mbfl_convert_vtbl vtbl_cp850_wchar; + +int mbfl_filt_conv_cp850_wchar(int c, mbfl_convert_filter *filter); +int mbfl_filt_conv_wchar_cp850(int c, mbfl_convert_filter *filter); + +#endif /* MBFL_MBFILTER_CP850_H */ diff --git a/ext/mbstring/libmbfl/filters/mbfilter_euc_jp_win.c b/ext/mbstring/libmbfl/filters/mbfilter_euc_jp_win.c index bc07f13438..ece0c7ee41 100644 --- a/ext/mbstring/libmbfl/filters/mbfilter_euc_jp_win.c +++ b/ext/mbstring/libmbfl/filters/mbfilter_euc_jp_win.c @@ -204,6 +204,9 @@ mbfl_filt_conv_eucjpwin_wchar(int c, mbfl_convert_filter *filter) s = (c1 - 0xa1)*94 + c - 0xa1; if (s >= 0 && s < jisx0212_ucs_table_size) { w = jisx0212_ucs_table[s]; + if (w == 0x007e) { + w = 0xff5e; /* FULLWIDTH TILDE */ + } } else if (s >= (82*94) && s < (84*94)) { /* vender ext3 (83ku - 84ku) <-> CP932 (115ku -120ku) */ s = (c1<< 8) | c; w = 0; @@ -222,6 +225,9 @@ mbfl_filt_conv_eucjpwin_wchar(int c, mbfl_convert_filter *filter) } else { w = 0; } + if (w == 0x00A6) { + w = 0xFFE4; /* FULLWIDTH BROKEN BAR */ + } if (w <= 0) { w = ((c1 & 0x7f) << 8) | (c & 0x7f); w &= MBFL_WCSPLANE_MASK; @@ -274,6 +280,9 @@ mbfl_filt_conv_wchar_eucjpwin(int c, mbfl_convert_filter *filter) c2 = s1%94 + 0xa1; s1 = (c1 << 8) | c2; } + if (s1 == 0xa2f1) { + s1 = 0x2d62; /* NUMERO SIGN */ + } if (s1 <= 0) { c1 = c & ~MBFL_WCSPLANE_MASK; if (c1 == MBFL_WCSPLANE_WINCP932) { @@ -311,6 +320,8 @@ mbfl_filt_conv_wchar_eucjpwin(int c, mbfl_convert_filter *filter) s1 = 0x2172; } else if (c == 0xffe2) { /* FULLWIDTH NOT SIGN */ s1 = 0x224c; + } else if (c == 0xff5e) { /* FULLWIDTH TILDE */ + s1 = 0x2141; } else { s1 = -1; c1 = 0; diff --git a/ext/mbstring/libmbfl/filters/mbfilter_iso2022_jp_ms.c b/ext/mbstring/libmbfl/filters/mbfilter_iso2022_jp_ms.c new file mode 100644 index 0000000000..df961677d1 --- /dev/null +++ b/ext/mbstring/libmbfl/filters/mbfilter_iso2022_jp_ms.c @@ -0,0 +1,522 @@ +/* + * "streamable kanji code filter and converter" + * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved. + * + * LICENSE NOTICES + * + * This file is part of "streamable kanji code filter and converter", + * which is distributed under the terms of GNU Lesser General Public + * License (version 2) as published by the Free Software Foundation. + * + * This software is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with "streamable kanji code filter and converter"; + * if not, write to the Free Software Foundation, Inc., 59 Temple Place, + * Suite 330, Boston, MA 02111-1307 USA + * + * The author of this file: + * + */ +/* + * The source code included in this files was separated from mbfilter_ja.c + * by moriyoshi koizumi on 4 dec 2002. + * + */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include "mbfilter.h" +#include "mbfilter_iso2022_jp_ms.h" + +#include "unicode_table_cp932_ext.h" +#include "unicode_table_jis.h" +#include "cp932_table.h" + +static int mbfl_filt_ident_2022jpms(int c, mbfl_identify_filter *filter); + +static const char *mbfl_encoding_2022jpms_aliases[] = {"ISO2022JPMS", NULL}; + +const mbfl_encoding mbfl_encoding_2022jpms = { + mbfl_no_encoding_2022jpms, + "ISO-2022-JP-MS", + "ISO-2022-JP", + (const char *(*)[])&mbfl_encoding_2022jpms_aliases, + NULL, + MBFL_ENCTYPE_MBCS | MBFL_ENCTYPE_SHFTCODE +}; + +const struct mbfl_identify_vtbl vtbl_identify_2022jpms = { + mbfl_no_encoding_2022jpms, + mbfl_filt_ident_common_ctor, + mbfl_filt_ident_common_dtor, + mbfl_filt_ident_2022jpms +}; + +const struct mbfl_convert_vtbl vtbl_2022jpms_wchar = { + mbfl_no_encoding_2022jpms, + mbfl_no_encoding_wchar, + mbfl_filt_conv_common_ctor, + mbfl_filt_conv_common_dtor, + mbfl_filt_conv_2022jpms_wchar, + mbfl_filt_conv_common_flush +}; + +const struct mbfl_convert_vtbl vtbl_wchar_2022jpms = { + mbfl_no_encoding_wchar, + mbfl_no_encoding_2022jpms, + mbfl_filt_conv_common_ctor, + mbfl_filt_conv_common_dtor, + mbfl_filt_conv_wchar_2022jpms, + mbfl_filt_conv_any_2022jpms_flush +}; + +#define CK(statement) do { if ((statement) < 0) return (-1); } while (0) + +#define sjistoidx(c1, c2) \ + (((c1) > 0x9f) \ + ? (((c1) - 0xc1) * 188 + (c2) - (((c2) > 0x7e) ? 0x41 : 0x40)) \ + : (((c1) - 0x81) * 188 + (c2) - (((c2) > 0x7e) ? 0x41 : 0x40))) +#define idxtojis1(c) (((c) / 94) + 0x21) +#define idxtojis2(c) (((c) % 94) + 0x21) + +/* + * ISO-2022-JP-MS => wchar + */ +int +mbfl_filt_conv_2022jpms_wchar(int c, mbfl_convert_filter *filter) +{ + int c1, s, w; + +retry: + switch (filter->status & 0xf) { +/* case 0x00: ASCII */ +/* case 0x10: X 0201 latin */ +/* case 0x20: X 0201 kana */ +/* case 0x80: X 0208 */ +/* case 0xa0: UDC */ + case 0: + if (c == 0x1b) { + filter->status += 2; + } else if (filter->status == 0x20 && c > 0x20 && c < 0x60) { /* kana */ + CK((*filter->output_function)(0xff40 + c, filter->data)); + } else if ((filter->status == 0x80 || filter->status == 0xa0) && c > 0x20 && c < 0x80) { /* kanji first char */ + filter->cache = c; + filter->status += 1; + } else if (c >= 0 && c < 0x80) { /* latin, CTLs */ + CK((*filter->output_function)(c, filter->data)); + } else if (c > 0xa0 && c < 0xe0) { /* GR kana */ + CK((*filter->output_function)(0xfec0 + c, filter->data)); + } else { + w = c & MBFL_WCSGROUP_MASK; + w |= MBFL_WCSGROUP_THROUGH; + CK((*filter->output_function)(w, filter->data)); + } + break; + +/* case 0x81: X 0208 second char */ +/* case 0xa1: UDC second char */ + case 1: + w = 0; + filter->status &= ~0xf; + c1 = filter->cache; + if (c > 0x20 && c < 0x7f) { + s = (c1 - 0x21)*94 + c - 0x21; + if (filter->status == 0x80) { + if (s <= 137) { + if (s == 31) { + w = 0xff3c; /* FULLWIDTH REVERSE SOLIDUS */ + } else if (s == 32) { + w = 0xff5e; /* FULLWIDTH TILDE */ + } else if (s == 33) { + w = 0x2225; /* PARALLEL TO */ + } else if (s == 60) { + w = 0xff0d; /* FULLWIDTH HYPHEN-MINUS */ + } else if (s == 80) { + w = 0xffe0; /* FULLWIDTH CENT SIGN */ + } else if (s == 81) { + w = 0xffe1; /* FULLWIDTH POUND SIGN */ + } else if (s == 137) { + w = 0xffe2; /* FULLWIDTH NOT SIGN */ + } + } + if (w == 0) { + if (s >= cp932ext1_ucs_table_min && s < cp932ext1_ucs_table_max) { /* vendor ext1 (13ku) */ + w = cp932ext1_ucs_table[s - cp932ext1_ucs_table_min]; + } else if (s >= 0 && s < jisx0208_ucs_table_size) { + w = jisx0208_ucs_table[s]; + } else if (s >= cp932ext2_ucs_table_min && s < cp932ext2_ucs_table_max) { /* vendor ext2 (89ku - 92ku) */ + w = cp932ext2_ucs_table[s - cp932ext2_ucs_table_min]; + } else { + w = 0; + } + } + if (w <= 0) { + w = (c1 << 8) | c; + w &= MBFL_WCSPLANE_MASK; + w |= MBFL_WCSPLANE_JIS0208; + } + CK((*filter->output_function)(w, filter->data)); + } else { + if (c1 > 0x20 && c1 < 0x35) { + w = 0xe000 + (c1 - 0x21)*94 + c - 0x21; + } + if (w <= 0) { + w = (((c1 - 0x21) + 0x7f) << 8) | c; + w &= MBFL_WCSPLANE_MASK; + w |= MBFL_WCSPLANE_JIS0208; + } + CK((*filter->output_function)(w, filter->data)); + } + } else if (c == 0x1b) { + filter->status += 2; + } else if ((c >= 0 && c < 0x21) || c == 0x7f) { /* CTLs */ + CK((*filter->output_function)(c, filter->data)); + } else { + w = (c1 << 8) | c; + w &= MBFL_WCSGROUP_MASK; + w |= MBFL_WCSGROUP_THROUGH; + CK((*filter->output_function)(w, filter->data)); + } + break; + + /* ESC */ +/* case 0x02: */ +/* case 0x12: */ +/* case 0x22: */ +/* case 0x82: */ +/* case 0xa2: */ + case 2: + if (c == 0x24) { /* '$' */ + filter->status++; + } else if (c == 0x28) { /* '(' */ + filter->status += 3; + } else { + filter->status &= ~0xf; + CK((*filter->output_function)(0x1b, filter->data)); + goto retry; + } + break; + + /* ESC $ */ +/* case 0x03: */ +/* case 0x13: */ +/* case 0x23: */ +/* case 0x83: */ +/* case 0xa3: */ + case 3: + if (c == 0x40 || c == 0x42) { /* '@' or 'B' */ + filter->status = 0x80; + } else if (c == 0x28) { /* '(' */ + filter->status++; + } else { + filter->status &= ~0xf; + CK((*filter->output_function)(0x1b, filter->data)); + CK((*filter->output_function)(0x24, filter->data)); + goto retry; + } + break; + + /* ESC $ ( */ +/* case 0x04: */ +/* case 0x14: */ +/* case 0x24: */ +/* case 0x84: */ +/* case 0xa4: */ + case 4: + if (c == 0x40 || c == 0x42) { /* '@' or 'B' */ + filter->status = 0x80; + } else if (c == 0x3f) { /* '?' */ + filter->status = 0xa0; + } else { + filter->status &= ~0xf; + CK((*filter->output_function)(0x1b, filter->data)); + CK((*filter->output_function)(0x24, filter->data)); + CK((*filter->output_function)(0x28, filter->data)); + goto retry; + } + break; + + /* ESC ( */ +/* case 0x05: */ +/* case 0x15: */ +/* case 0x25: */ +/* case 0x85: */ +/* case 0xa5: */ + case 5: + if (c == 0x42) { /* 'B' */ + filter->status = 0; + } else if (c == 0x4a) { /* 'J' */ + filter->status = 0; + } else if (c == 0x49) { /* 'I' */ + filter->status = 0x20; + } else { + filter->status &= ~0xf; + CK((*filter->output_function)(0x1b, filter->data)); + CK((*filter->output_function)(0x28, filter->data)); + goto retry; + } + break; + + default: + filter->status = 0; + break; + } + + return c; +} + +static int +cp932ext3_cp932ext2_jis(int c) +{ + int idx; + + idx = sjistoidx(0xfa, 0x40) + c; + if (idx >= sjistoidx(0xfa, 0x5c)) + idx -= sjistoidx(0xfa, 0x5c) - sjistoidx(0xed, 0x40); + else if (idx >= sjistoidx(0xfa, 0x55)) + idx -= sjistoidx(0xfa, 0x55) - sjistoidx(0xee, 0xfa); + else if (idx >= sjistoidx(0xfa, 0x40)) + idx -= sjistoidx(0xfa, 0x40) - sjistoidx(0xee, 0xef); + return idxtojis1(idx) << 8 | idxtojis2(idx); +} + +/* + * wchar => ISO-2022-JP-MS + */ +int +mbfl_filt_conv_wchar_2022jpms(int c, mbfl_convert_filter *filter) +{ + int c1, c2, s1, s2; + + s1 = 0; + s2 = 0; + if (c >= ucs_a1_jis_table_min && c < ucs_a1_jis_table_max) { + s1 = ucs_a1_jis_table[c - ucs_a1_jis_table_min]; + } else if (c >= ucs_a2_jis_table_min && c < ucs_a2_jis_table_max) { + s1 = ucs_a2_jis_table[c - ucs_a2_jis_table_min]; + } else if (c >= ucs_i_jis_table_min && c < ucs_i_jis_table_max) { + s1 = ucs_i_jis_table[c - ucs_i_jis_table_min]; + } else if (c >= ucs_r_jis_table_min && c < ucs_r_jis_table_max) { + s1 = ucs_r_jis_table[c - ucs_r_jis_table_min]; + } else if (c >= 0xe000 && c < (0xe000 + 20*94)) { /* user (95ku - 114ku) */ + s1 = c - 0xe000; + c1 = s1/94 + 0x7f; + c2 = s1%94 + 0x21; + s1 = (c1 << 8) | c2; + } + if (s1 <= 0) { + c1 = c & ~MBFL_WCSPLANE_MASK; + if (c1 == MBFL_WCSPLANE_WINCP932) { + s1 = c & MBFL_WCSPLANE_MASK; + s2 = 1; + } else if (c1 == MBFL_WCSPLANE_JIS0208) { + s1 = c & MBFL_WCSPLANE_MASK; + } else if (c1 == MBFL_WCSPLANE_JIS0212) { + s1 = c & MBFL_WCSPLANE_MASK; + s1 |= 0x8080; + } else if (c == 0xa5) { /* YEN SIGN */ + s1 = 0x216f; /* FULLWIDTH YEN SIGN */ + } else if (c == 0x203e) { /* OVER LINE */ + s1 = 0x2131; /* FULLWIDTH MACRON */ + } else if (c == 0xff3c) { /* FULLWIDTH REVERSE SOLIDUS */ + s1 = 0x2140; + } else if (c == 0xff5e) { /* FULLWIDTH TILDE */ + s1 = 0x2141; + } else if (c == 0x2225) { /* PARALLEL TO */ + s1 = 0x2142; + } else if (c == 0xff0d) { /* FULLWIDTH HYPHEN-MINUS */ + s1 = 0x215d; + } else if (c == 0xffe0) { /* FULLWIDTH CENT SIGN */ + s1 = 0x2171; + } else if (c == 0xffe1) { /* FULLWIDTH POUND SIGN */ + s1 = 0x2172; + } else if (c == 0xffe2) { /* FULLWIDTH NOT SIGN */ + s1 = 0x224c; + } + } + if ((s1 <= 0) || (s1 >= 0xa1a1 && s2 == 0)) { /* not found or X 0212 */ + s1 = -1; + c1 = 0; + c2 = cp932ext1_ucs_table_max - cp932ext1_ucs_table_min; + while (c1 < c2) { /* CP932 vendor ext1 (13ku) */ + if (c == cp932ext1_ucs_table[c1]) { + s1 = ((c1/94 + 0x2d) << 8) + (c1%94 + 0x21); + break; + } + c1++; + } + if (s1 <= 0) { + c1 = 0; + c2 = cp932ext3_ucs_table_max - cp932ext3_ucs_table_min; + while (c1 < c2) { /* CP932 vendor ext3 (115ku - 119ku) */ + if (c == cp932ext3_ucs_table[c1]) { + s1 = cp932ext3_cp932ext2_jis(c1); + break; + } + c1++; + } + } + if (c == 0) { + s1 = 0; + } else if (s1 <= 0) { + s1 = -1; + } + } + if (s1 >= 0) { + if (s1 < 0x80) { /* latin */ + if ((filter->status & 0xff00) != 0) { + CK((*filter->output_function)(0x1b, filter->data)); /* ESC */ + CK((*filter->output_function)(0x28, filter->data)); /* '(' */ + CK((*filter->output_function)(0x42, filter->data)); /* 'B' */ + } + CK((*filter->output_function)(s1, filter->data)); + filter->status = 0; + } else if (s1 > 0xa0 && s1 < 0xe0) { /* kana */ + if ((filter->status & 0xff00) != 0x100) { + CK((*filter->output_function)(0x1b, filter->data)); /* ESC */ + CK((*filter->output_function)(0x28, filter->data)); /* '(' */ + CK((*filter->output_function)(0x49, filter->data)); /* 'I' */ + } + filter->status = 0x100; + CK((*filter->output_function)(s1 & 0x7f, filter->data)); + } else if (s1 < 0x7e7f) { /* X 0208 */ + if ((filter->status & 0xff00) != 0x200) { + CK((*filter->output_function)(0x1b, filter->data)); /* ESC */ + CK((*filter->output_function)(0x24, filter->data)); /* '$' */ + CK((*filter->output_function)(0x42, filter->data)); /* 'B' */ + } + filter->status = 0x200; + CK((*filter->output_function)((s1 >> 8) & 0xff, filter->data)); + CK((*filter->output_function)(s1 & 0x7f, filter->data)); + } else if (s1 < 0x927f) { /* UDC */ + if ((filter->status & 0xff00) != 0x800) { + CK((*filter->output_function)(0x1b, filter->data)); /* ESC */ + CK((*filter->output_function)(0x24, filter->data)); /* '$' */ + CK((*filter->output_function)(0x28, filter->data)); /* '(' */ + CK((*filter->output_function)(0x3f, filter->data)); /* '?' */ + } + filter->status = 0x800; + CK((*filter->output_function)(((s1 >> 8) - 0x5e) & 0x7f, filter->data)); + CK((*filter->output_function)(s1 & 0x7f, filter->data)); + } + } else { + if (filter->illegal_mode != MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE) { + CK(mbfl_filt_conv_illegal_output(c, filter)); + } + } + + return c; +} + +int +mbfl_filt_conv_any_2022jpms_flush(mbfl_convert_filter *filter) +{ + /* back to latin */ + if ((filter->status & 0xff00) != 0) { + CK((*filter->output_function)(0x1b, filter->data)); /* ESC */ + CK((*filter->output_function)(0x28, filter->data)); /* '(' */ + CK((*filter->output_function)(0x42, filter->data)); /* 'B' */ + } + filter->status &= 0xff; + return 0; +} + +static int mbfl_filt_ident_2022jpms(int c, mbfl_identify_filter *filter) +{ +retry: + switch (filter->status & 0xf) { +/* case 0x00: ASCII */ +/* case 0x10: X 0201 latin */ +/* case 0x20: X 0201 kana */ +/* case 0x80: X 0208 */ +/* case 0xa0: X UDC */ + case 0: + if (c == 0x1b) { + filter->status += 2; + } else if ((filter->status == 0x80 || filter->status == 0xa0) && c > 0x20 && c < 0x80) { /* kanji first char */ + filter->status += 1; + } else if (c >= 0 && c < 0x80) { /* latin, CTLs */ + ; + } else { + filter->flag = 1; /* bad */ + } + break; + +/* case 0x81: X 0208 second char */ +/* case 0xa1: UDC second char */ + case 1: + filter->status &= ~0xf; + if (c == 0x1b) { + goto retry; + } else if (c < 0x21 || c > 0x7e) { /* bad */ + filter->flag = 1; + } + break; + + /* ESC */ + case 2: + if (c == 0x24) { /* '$' */ + filter->status++; + } else if (c == 0x28) { /* '(' */ + filter->status += 3; + } else { + filter->flag = 1; /* bad */ + filter->status &= ~0xf; + goto retry; + } + break; + + /* ESC $ */ + case 3: + if (c == 0x40 || c == 0x42) { /* '@' or 'B' */ + filter->status = 0x80; + } else if (c == 0x28) { /* '(' */ + filter->status++; + } else { + filter->flag = 1; /* bad */ + filter->status &= ~0xf; + goto retry; + } + break; + + /* ESC $ ( */ + case 4: + if (c == 0x40 || c == 0x42) { /* '@' or 'B' */ + filter->status = 0x80; + } else if (c == 0x3f) { /* '?' */ + filter->status = 0xa0; + } else { + filter->flag = 1; /* bad */ + filter->status &= ~0xf; + goto retry; + } + break; + + /* ESC ( */ + case 5: + if (c == 0x42) { /* 'B' */ + filter->status = 0; + } else if (c == 0x4a) { /* 'J' */ + filter->status = 0; + } else if (c == 0x49) { /* 'I' */ + filter->status = 0x20; + } else { + filter->flag = 1; /* bad */ + filter->status &= ~0xf; + goto retry; + } + break; + + default: + filter->status = 0; + break; + } + + return c; +} diff --git a/ext/mbstring/libmbfl/filters/mbfilter_iso2022_jp_ms.h b/ext/mbstring/libmbfl/filters/mbfilter_iso2022_jp_ms.h new file mode 100644 index 0000000000..8479a45095 --- /dev/null +++ b/ext/mbstring/libmbfl/filters/mbfilter_iso2022_jp_ms.h @@ -0,0 +1,44 @@ +/* + * "streamable kanji code filter and converter" + * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved. + * + * LICENSE NOTICES + * + * This file is part of "streamable kanji code filter and converter", + * which is distributed under the terms of GNU Lesser General Public + * License (version 2) as published by the Free Software Foundation. + * + * This software is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with "streamable kanji code filter and converter"; + * if not, write to the Free Software Foundation, Inc., 59 Temple Place, + * Suite 330, Boston, MA 02111-1307 USA + * + * The author of this file: + * + */ +/* + * The source code included in this files was separated from mbfilter_ja.c + * by moriyoshi koizumi on 4 dec 2002. + * + */ + +#ifndef MBFL_MBFILTER_ISO2022_JP_MS_H +#define MBFL_MBFILTER_ISO2022_JP_MS_H + +#include "mbfilter.h" + +extern const mbfl_encoding mbfl_encoding_2022jpms; +extern const struct mbfl_identify_vtbl vtbl_identify_2022jpms; +extern const struct mbfl_convert_vtbl vtbl_2022jpms_wchar; +extern const struct mbfl_convert_vtbl vtbl_wchar_2022jpms; + +int mbfl_filt_conv_2022jpms_wchar(int c, mbfl_convert_filter *filter); +int mbfl_filt_conv_wchar_2022jpms(int c, mbfl_convert_filter *filter); +int mbfl_filt_conv_any_2022jpms_flush(mbfl_convert_filter *filter); + +#endif /* MBFL_MBFILTER_ISO2022_JP_MS_H */ diff --git a/ext/mbstring/libmbfl/filters/mbfilter_jis.c b/ext/mbstring/libmbfl/filters/mbfilter_jis.c index 3657658ba8..58336d4e28 100644 --- a/ext/mbstring/libmbfl/filters/mbfilter_jis.c +++ b/ext/mbstring/libmbfl/filters/mbfilter_jis.c @@ -58,15 +58,6 @@ const mbfl_encoding mbfl_encoding_2022jp = { MBFL_ENCTYPE_MBCS | MBFL_ENCTYPE_SHFTCODE }; -const mbfl_encoding mbfl_encoding_2022jpms = { - mbfl_no_encoding_2022jpms, - "ISO-2022-JP-MS", - "ISO-2022-JP-MS", - NULL, - NULL, - MBFL_ENCTYPE_MBCS | MBFL_ENCTYPE_SHFTCODE -}; - const struct mbfl_identify_vtbl vtbl_identify_jis = { mbfl_no_encoding_jis, mbfl_filt_ident_common_ctor, @@ -81,13 +72,6 @@ const struct mbfl_identify_vtbl vtbl_identify_2022jp = { mbfl_filt_ident_2022jp }; -const struct mbfl_identify_vtbl vtbl_identify_2022jpms = { - mbfl_no_encoding_2022jpms, - mbfl_filt_ident_common_ctor, - mbfl_filt_ident_common_dtor, - mbfl_filt_ident_2022jp -}; - const struct mbfl_convert_vtbl vtbl_jis_wchar = { mbfl_no_encoding_jis, mbfl_no_encoding_wchar, @@ -124,24 +108,6 @@ const struct mbfl_convert_vtbl vtbl_wchar_2022jp = { mbfl_filt_conv_any_jis_flush }; -const struct mbfl_convert_vtbl vtbl_2022jpms_wchar = { - mbfl_no_encoding_2022jpms, - mbfl_no_encoding_wchar, - mbfl_filt_conv_common_ctor, - mbfl_filt_conv_common_dtor, - mbfl_filt_conv_jis_wchar, - mbfl_filt_conv_common_flush -}; - -const struct mbfl_convert_vtbl vtbl_wchar_2022jpms = { - mbfl_no_encoding_wchar, - mbfl_no_encoding_2022jpms, - mbfl_filt_conv_common_ctor, - mbfl_filt_conv_common_dtor, - mbfl_filt_conv_wchar_jis, - mbfl_filt_conv_any_jis_flush -}; - #define CK(statement) do { if ((statement) < 0) return (-1); } while (0) /* @@ -164,10 +130,12 @@ retry: filter->status += 2; } else if (c == 0x0e) { /* "kana in" */ filter->status = 0x20; - CK((*filter->output_function)(c, filter->data)); } else if (c == 0x0f) { /* "kana out" */ filter->status = 0; - CK((*filter->output_function)(c, filter->data)); + } else if (filter->status == 0x10 && c == 0x5c) { /* YEN SIGN */ + CK((*filter->output_function)(0xa5, filter->data)); + } else if (filter->status == 0x10 && c == 0x7e) { /* OVER LINE */ + CK((*filter->output_function)(0x203e, filter->data)); } else if (filter->status == 0x20 && c > 0x20 && c < 0x60) { /* kana */ CK((*filter->output_function)(0xff40 + c, filter->data)); } else if ((filter->status == 0x80 || filter->status == 0x90) && c > 0x20 && c < 0x7f) { /* kanji first char */ @@ -193,34 +161,9 @@ retry: s = (c1 - 0x21)*94 + c - 0x21; if (filter->status == 0x80) { if (s >= 0 && s < jisx0208_ucs_table_size) { - if ((filter->from)->no_encoding != - mbfl_no_encoding_2022jpms) { - w = jisx0208_ucs_table[s]; - } - else { - if ((c1 - 0x21) == 12) { - w = cp932ext1_ucs_table[s-12*94]; - } - else { - if (c1 >= 0x79 && c1 <= 0x7c) { - w = cp932ext2_ucs_table[s-(0x79-0x21)*94]; - } - else { w = jisx0208_ucs_table[s]; - } - } - } } else { - if ((filter->from)->no_encoding != - mbfl_no_encoding_2022jpms) { - w = 0; - } else { - if (c1 >= 0x79 && c1 <= 0x7c) { - w = cp932ext2_ucs_table[s-(0x79-0x21)*94]; - } else { w = 0; - } - } } if (w <= 0) { w = (c1 << 8) | c; @@ -344,7 +287,7 @@ retry: int mbfl_filt_conv_wchar_jis(int c, mbfl_convert_filter *filter) { - int c1, c2, s; + int c1, s; s = 0; if (c >= ucs_a1_jis_table_min && c < ucs_a1_jis_table_max) { @@ -356,18 +299,6 @@ mbfl_filt_conv_wchar_jis(int c, mbfl_convert_filter *filter) } else if (c >= ucs_r_jis_table_min && c < ucs_r_jis_table_max) { s = ucs_r_jis_table[c - ucs_r_jis_table_min]; } - if (s > 0x8080 && s < 0x10000 && - ((filter->to)->no_encoding == mbfl_no_encoding_2022jpms)) { - c1 = 0; - c2 = cp932ext2_ucs_table_max - cp932ext2_ucs_table_min; - while (c1 < c2) { /* CP932 vendor ext3 (115ku - 119ku) */ - if (c == cp932ext2_ucs_table[c1]) { - s = ((c1/94 + 0x79) << 8) +(c1%94 + 0x21); - break; - } - c1++; - } - } if (s <= 0) { c1 = c & ~MBFL_WCSPLANE_MASK; if (c1 == MBFL_WCSPLANE_JIS0208) { @@ -396,30 +327,8 @@ mbfl_filt_conv_wchar_jis(int c, mbfl_convert_filter *filter) } if (c == 0) { s = 0; - } else if (s <= 0 && ((filter->to)->no_encoding == - mbfl_no_encoding_2022jpms)) { + } else if (s <= 0) { s = -1; - c1 = 0; - c2 = cp932ext1_ucs_table_max - cp932ext1_ucs_table_min; - while (c1 < c2) { /* CP932 vendor ext1 (13ku) */ - if (c == cp932ext1_ucs_table[c1]) { - s = ((c1/94 + 0x2d) << 8) + (c1%94 + 0x21); - break; - } - c1++; - } - if (s < 0 && ((filter->to)->no_encoding == - mbfl_no_encoding_2022jpms)) { - c1 = 0; - c2 = cp932ext2_ucs_table_max - cp932ext2_ucs_table_min; - while (c1 < c2) { /* CP932 vendor ext3 (115ku - 119ku) */ - if (c == cp932ext2_ucs_table[c1]) { - s = ((c1/94 + 0x79) << 8) +(c1%94 + 0x21); - break; - } - c1++; - } - } } } if (s >= 0) { @@ -483,7 +392,7 @@ mbfl_filt_conv_wchar_jis(int c, mbfl_convert_filter *filter) int mbfl_filt_conv_wchar_2022jp(int c, mbfl_convert_filter *filter) { - int c1, c2, s; + int s; s = 0; if (c >= ucs_a1_jis_table_min && c < ucs_a1_jis_table_max) { @@ -519,47 +428,9 @@ mbfl_filt_conv_wchar_2022jp(int c, mbfl_convert_filter *filter) s = 0; } else if (s <= 0) { s = -1; - if ( (filter->to)->no_encoding == - mbfl_no_encoding_2022jpms) { - c1 = 0; - c2 = cp932ext1_ucs_table_max - cp932ext1_ucs_table_min; - while (c1 < c2) { /* CP932 vendor ext1 (13ku) */ - if (c == cp932ext1_ucs_table[c1]) { - s = ((c1/94 + 0x2d) << 8) + (c1%94 + 0x21); - break; - } - c1++; - } - if ((filter->to)->no_encoding == - mbfl_no_encoding_2022jpms) { - c1 = 0; - c2 = cp932ext2_ucs_table_max - cp932ext2_ucs_table_min; - while (c1 < c2) { /* CP932 vendor ext3 (115ku - 119ku) */ - if (c == cp932ext2_ucs_table[c1]) { - s = ((c1/94 + 0x79) << 8) +(c1%94 + 0x21); - break; - } - c1++; - } - } - } } - } else if (((s >= 0x80 && s < 0x2121) && - (filter->to)->no_encoding != mbfl_no_encoding_2022jpms) || - (s > 0x8080)) { + } else if ((s >= 0x80 && s < 0x2121) || (s > 0x8080)) { s = -1; - if ((filter->to)->no_encoding == - mbfl_no_encoding_2022jpms) { - c1 = 0; - c2 = cp932ext2_ucs_table_max - cp932ext2_ucs_table_min; - while (c1 < c2) { /* CP932 vendor ext3 (115ku - 119ku) */ - if (c == cp932ext2_ucs_table[c1]) { - s = ((c1/94 + 0x79) << 8) +(c1%94 + 0x21); - break; - } - c1++; - } - } } if (s >= 0) { if (s < 0x80) { /* ASCII */ @@ -570,15 +441,6 @@ mbfl_filt_conv_wchar_2022jp(int c, mbfl_convert_filter *filter) } filter->status = 0; CK((*filter->output_function)(s, filter->data)); - } else if (s < 0x100 && ((filter->to)->no_encoding == - mbfl_no_encoding_2022jpms)) { /* kana */ - if ((filter->status & 0xff00) != 0x100) { - CK((*filter->output_function)(0x1b, filter->data)); /* ESC */ - CK((*filter->output_function)(0x28, filter->data)); /* '(' */ - CK((*filter->output_function)(0x49, filter->data)); /* 'I' */ - } - filter->status = 0x100; - CK((*filter->output_function)(s & 0x7f, filter->data)); } else if (s < 0x10000) { /* X 0208 */ if ((filter->status & 0xff00) != 0x200) { CK((*filter->output_function)(0x1b, filter->data)); /* ESC */ diff --git a/ext/mbstring/libmbfl/filters/mbfilter_jis.h b/ext/mbstring/libmbfl/filters/mbfilter_jis.h index 1ba244a282..0375307263 100644 --- a/ext/mbstring/libmbfl/filters/mbfilter_jis.h +++ b/ext/mbstring/libmbfl/filters/mbfilter_jis.h @@ -34,16 +34,12 @@ extern const mbfl_encoding mbfl_encoding_jis; extern const mbfl_encoding mbfl_encoding_2022jp; -extern const mbfl_encoding mbfl_encoding_2022jpms; extern const struct mbfl_identify_vtbl vtbl_identify_2022jp; -extern const struct mbfl_identify_vtbl vtbl_identify_2022jpms; extern const struct mbfl_identify_vtbl vtbl_identify_jis; extern const struct mbfl_convert_vtbl vtbl_jis_wchar; extern const struct mbfl_convert_vtbl vtbl_wchar_jis; extern const struct mbfl_convert_vtbl vtbl_2022jp_wchar; extern const struct mbfl_convert_vtbl vtbl_wchar_2022jp; -extern const struct mbfl_convert_vtbl vtbl_2022jpms_wchar; -extern const struct mbfl_convert_vtbl vtbl_wchar_2022jpms; int mbfl_filt_conv_jis_wchar(int c, mbfl_convert_filter *filter); int mbfl_filt_conv_wchar_jis(int c, mbfl_convert_filter *filter); diff --git a/ext/mbstring/libmbfl/filters/mbfilter_koi8u.c b/ext/mbstring/libmbfl/filters/mbfilter_koi8u.c new file mode 100644 index 0000000000..9b8f450e9e --- /dev/null +++ b/ext/mbstring/libmbfl/filters/mbfilter_koi8u.c @@ -0,0 +1,146 @@ +/* + * "streamable kanji code filter and converter" + * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved. + * + * LICENSE NOTICES + * + * This file is part of "streamable kanji code filter and converter", + * which is distributed under the terms of GNU Lesser General Public + * License (version 2) as published by the Free Software Foundation. + * + * This software is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with "streamable kanji code filter and converter"; + * if not, write to the Free Software Foundation, Inc., 59 Temple Place, + * Suite 330, Boston, MA 02111-1307 USA + * + * The author of this part: Maksym Veremeyenko + * + * Based on mbfilter_koi8r.c code + * + */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include "mbfilter.h" +#include "mbfilter_koi8u.h" +#include "unicode_table_koi8u.h" + +static int mbfl_filt_ident_koi8u(int c, mbfl_identify_filter *filter); + +static const char *mbfl_encoding_koi8u_aliases[] = {"KOI8-U", "KOI8U", NULL}; + +const mbfl_encoding mbfl_encoding_koi8u = { + mbfl_no_encoding_koi8u, + "KOI8-U", + "KOI8-U", + (const char *(*)[])&mbfl_encoding_koi8u_aliases, + NULL, + MBFL_ENCTYPE_SBCS +}; + +const struct mbfl_identify_vtbl vtbl_identify_koi8u = { + mbfl_no_encoding_koi8u, + mbfl_filt_ident_common_ctor, + mbfl_filt_ident_common_dtor, + mbfl_filt_ident_koi8u +}; + +const struct mbfl_convert_vtbl vtbl_wchar_koi8u = { + mbfl_no_encoding_wchar, + mbfl_no_encoding_koi8u, + mbfl_filt_conv_common_ctor, + mbfl_filt_conv_common_dtor, + mbfl_filt_conv_wchar_koi8u, + mbfl_filt_conv_common_flush +}; + +const struct mbfl_convert_vtbl vtbl_koi8u_wchar = { + mbfl_no_encoding_koi8u, + mbfl_no_encoding_wchar, + mbfl_filt_conv_common_ctor, + mbfl_filt_conv_common_dtor, + mbfl_filt_conv_koi8u_wchar, + mbfl_filt_conv_common_flush +}; + +#define CK(statement) do { if ((statement) < 0) return (-1); } while (0) + +/* + * koi8u => wchar + */ +int +mbfl_filt_conv_koi8u_wchar(int c, mbfl_convert_filter *filter) +{ + int s; + + if (c >= 0 && c < koi8u_ucs_table_min) { + s = c; + } else if (c >= koi8u_ucs_table_min && c < 0x100) { + s = koi8u_ucs_table[c - koi8u_ucs_table_min]; + if (s <= 0) { + s = c; + s &= MBFL_WCSPLANE_MASK; + s |= MBFL_WCSPLANE_KOI8U; + } + } else { + s = c; + s &= MBFL_WCSGROUP_MASK; + s |= MBFL_WCSGROUP_THROUGH; + } + + CK((*filter->output_function)(s, filter->data)); + + return c; +} + +/* + * wchar => koi8u + */ +int +mbfl_filt_conv_wchar_koi8u(int c, mbfl_convert_filter *filter) +{ + int s, n; + + if (c < 0x80) { + s = c; + } else { + s = -1; + n = koi8u_ucs_table_len-1; + while (n >= 0) { + if (c == koi8u_ucs_table[n]) { + s = koi8u_ucs_table_min + n; + break; + } + n--; + } + if (s <= 0 && (c & ~MBFL_WCSPLANE_MASK) == MBFL_WCSPLANE_KOI8U) { + s = c & MBFL_WCSPLANE_MASK; + } + } + + if (s >= 0) { + CK((*filter->output_function)(s, filter->data)); + } else { + if (filter->illegal_mode != MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE) { + CK(mbfl_filt_conv_illegal_output(c, filter)); + } + } + + return c; +} + +static int mbfl_filt_ident_koi8u(int c, mbfl_identify_filter *filter) +{ + if (c >= 0x80 && c < 0xff) + filter->flag = 0; + else + filter->flag = 1; /* not it */ + return c; +} diff --git a/ext/mbstring/libmbfl/filters/mbfilter_koi8u.h b/ext/mbstring/libmbfl/filters/mbfilter_koi8u.h new file mode 100644 index 0000000000..693ade3d6d --- /dev/null +++ b/ext/mbstring/libmbfl/filters/mbfilter_koi8u.h @@ -0,0 +1,47 @@ +/* + * "streamable kanji code filter and converter" + * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved. + * + * LICENSE NOTICES + * + * This file is part of "streamable kanji code filter and converter", + * which is distributed under the terms of GNU Lesser General Public + * License (version 2) as published by the Free Software Foundation. + * + * This software is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with "streamable kanji code filter and converter"; + * if not, write to the Free Software Foundation, Inc., 59 Temple Place, + * Suite 330, Boston, MA 02111-1307 USA + * + * The author of this part: Maksym Veremeyenko + * + * Based on mbfilter_koi8r.h code + * + */ + +#ifndef MBFL_MBFILTER_KOI8U_H +#define MBFL_MBFILTER_KOI8U_H + +#include "mbfilter.h" + +extern const mbfl_encoding mbfl_encoding_koi8u; +extern const struct mbfl_identify_vtbl vtbl_identify_koi8u; +extern const struct mbfl_convert_vtbl vtbl_wchar_koi8u; +extern const struct mbfl_convert_vtbl vtbl_koi8u_wchar; + +int mbfl_filt_conv_koi8u_wchar(int c, mbfl_convert_filter *filter); +int mbfl_filt_conv_wchar_koi8u(int c, mbfl_convert_filter *filter); + +#endif /* MBFL_MBFILTER_KOI8U_H */ + +/* + * Local variables: + * tab-width: 4 + * c-basic-offset: 4 + * End: + */ diff --git a/ext/mbstring/libmbfl/filters/unicode_table_cp1254.h b/ext/mbstring/libmbfl/filters/unicode_table_cp1254.h new file mode 100644 index 0000000000..644053cf7a --- /dev/null +++ b/ext/mbstring/libmbfl/filters/unicode_table_cp1254.h @@ -0,0 +1,51 @@ +/* + * "streamable kanji code filter and converter" + * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved. + * + * LICENSE NOTICES + * + * This file is part of "streamable kanji code filter and converter", + * which is distributed under the terms of GNU Lesser General Public + * License (version 2) as published by the Free Software Foundation. + * + * This software is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with "streamable kanji code filter and converter"; + * if not, write to the Free Software Foundation, Inc., 59 Temple Place, + * Suite 330, Boston, MA 02111-1307 USA + * + * The authors of this file: PHP3 internationalization team + * You can contact the primary author 金本 茂 . + * + */ + +#ifndef UNICODE_TABLE_CP1254_H + +/* cp1254 to Unicode table */ +static const unsigned short cp1254_ucs_table[] = { + 0x20ac, 0xfffe, 0x201a, 0x0192, 0x201e, 0x2026, 0x2020, 0x2021, + 0x02c6, 0x2030, 0x0160, 0x2039, 0x0152, 0xfffe, 0xfffe, 0xfffe, + 0xfffe, 0x2018, 0x2019, 0x201c, 0x201d, 0x2022, 0x2013, 0x2014, + 0x02dc, 0x2122, 0x0161, 0x203a, 0x0153, 0xfffe, 0xfffe, 0x0178, + 0x00a0, 0x00a1, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7, + 0x00a8, 0x00a9, 0x00aa, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af, + 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x00b6, 0x00b7, + 0x00b8, 0x00b9, 0x00ba, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00bf, + 0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7, + 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf, + 0x011e, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x00d7, + 0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x0130, 0x015e, 0x00df, + 0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7, + 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef, + 0x011f, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x00f7, + 0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x0131, 0x015f, 0x00ff +}; +static const int cp1254_ucs_table_min = 0x80; +static const int cp1254_ucs_table_len = (sizeof (cp1254_ucs_table) / sizeof (unsigned short)); +static const int cp1254_ucs_table_max = 0x80 + (sizeof (cp1254_ucs_table) / sizeof (unsigned short)); + +#endif /* UNICODE_TABLE_CP1254_H */ diff --git a/ext/mbstring/libmbfl/filters/unicode_table_cp850.h b/ext/mbstring/libmbfl/filters/unicode_table_cp850.h new file mode 100644 index 0000000000..6c60ae03e0 --- /dev/null +++ b/ext/mbstring/libmbfl/filters/unicode_table_cp850.h @@ -0,0 +1,52 @@ +/* + * "streamable kanji code filter and converter" + * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved. + * + * LICENSE NOTICES + * + * This file is part of "streamable kanji code filter and converter", + * which is distributed under the terms of GNU Lesser General Public + * License (version 2) as published by the Free Software Foundation. + * + * This software is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with "streamable kanji code filter and converter"; + * if not, write to the Free Software Foundation, Inc., 59 Temple Place, + * Suite 330, Boston, MA 02111-1307 USA + * + * The author of this part: Den V. Tsopa + * Adaption for CP850: D. Giffeler + * + */ + +#ifndef UNICODE_TABLE_CP850_H +#define UNICODE_TABLE_CP850_H + +/* cp850_DOS to Unicode table */ +static const unsigned short cp850_ucs_table[] = { + 0x00c7, 0x00fc, 0x00e9, 0x00e2, 0x00e4, 0x00e0, 0x00e5, 0x00e7 +, 0x00ea, 0x00eb, 0x00e8, 0x00ef, 0x00ee, 0x00ec, 0x00c4, 0x00c5 +, 0x00c9, 0x00e6, 0x00c6, 0x00f4, 0x00f6, 0x00f2, 0x00fb, 0x00f9 +, 0x00ff, 0x00d6, 0x00dc, 0x00f8, 0x00a3, 0x00d8, 0x00d7, 0x0192 +, 0x00e1, 0x00ed, 0x00f3, 0x00fa, 0x00f1, 0x00d1, 0x00aa, 0x00ba +, 0x00bf, 0x00ae, 0x00ac, 0x00bd, 0x00bc, 0x00a1, 0x00ab, 0x00bb +, 0x2591, 0x2592, 0x2593, 0x2502, 0x2524, 0x00c1, 0x00c2, 0x00c0 +, 0x00a9, 0x2563, 0x2551, 0x2557, 0x255d, 0x00a2, 0x00a5, 0x2510 +, 0x2514, 0x2534, 0x252c, 0x251c, 0x2500, 0x253c, 0x00e3, 0x00c3 +, 0x255a, 0x2554, 0x2569, 0x2566, 0x2560, 0x2550, 0x256c, 0x00a4 +, 0x00f0, 0x00d0, 0x00ca, 0x00cb, 0x00c8, 0x0131, 0x00cd, 0x00ce +, 0x00cf, 0x2518, 0x250c, 0x2588, 0x2584, 0x00a6, 0x00cc, 0x2580 +, 0x00d3, 0x00df, 0x00d4, 0x00d2, 0x00f5, 0x00d5, 0x00b5, 0x00fe +, 0x00de, 0x00da, 0x00db, 0x00d9, 0x00fd, 0x00dd, 0x00af, 0x00b4 +, 0x00ad, 0x00b1, 0x2017, 0x00be, 0x00b6, 0x00a7, 0x00f7, 0x00b8 +, 0x00b0, 0x00a8, 0x00b7, 0x00b9, 0x00b3, 0x00b2, 0x25a0, 0x00a0 +}; +static const int cp850_ucs_table_min = 0x80; +static const int cp850_ucs_table_len = (sizeof (cp850_ucs_table) / sizeof (unsigned short)); +static const int cp850_ucs_table_max = 0x80 + (sizeof (cp850_ucs_table) / sizeof (unsigned short)); + +#endif /* UNICODE_TABLE_CP850_H */ diff --git a/ext/mbstring/libmbfl/filters/unicode_table_jis.h b/ext/mbstring/libmbfl/filters/unicode_table_jis.h index e87dad93c8..5671c4e851 100644 --- a/ext/mbstring/libmbfl/filters/unicode_table_jis.h +++ b/ext/mbstring/libmbfl/filters/unicode_table_jis.h @@ -36,13 +36,13 @@ static const unsigned short jisx0208_ucs_table[] = { 0xFF1F,0xFF01,0x309B,0x309C,0x00B4,0xFF40,0x00A8,0xFF3E, 0xFFE3,0xFF3F,0x30FD,0x30FE,0x309D,0x309E,0x3003,0x4EDD, 0x3005,0x3006,0x3007,0x30FC,0x2015,0x2010,0xFF0F,0xFF3C, - 0xFF5E,0x2225,0xFF5C,0x2026,0x2025,0x2018,0x2019,0x201C, + 0x301C,0x2016,0xFF5C,0x2026,0x2025,0x2018,0x2019,0x201C, 0x201D,0xFF08,0xFF09,0x3014,0x3015,0xFF3B,0xFF3D,0xFF5B, 0xFF5D,0x3008,0x3009,0x300A,0x300B,0x300C,0x300D,0x300E, - 0x300F,0x3010,0x3011,0xFF0B,0xFF0D,0x00B1,0x00D7,0x00F7, + 0x300F,0x3010,0x3011,0xFF0B,0x2212,0x00B1,0x00D7,0x00F7, 0xFF1D,0x2260,0xFF1C,0xFF1E,0x2266,0x2267,0x221E,0x2234, 0x2642,0x2640,0x00B0,0x2032,0x2033,0x2103,0xFFE5,0xFF04, - 0xFFE0,0xFFE1,0xFF05,0xFF03,0xFF06,0xFF0A,0xFF20,0x00A7, + 0x00A2,0x00A3,0xFF05,0xFF03,0xFF06,0xFF0A,0xFF20,0x00A7, 0x2606,0x2605,0x25CB,0x25CF,0x25CE,0x25C7, /* ku 2 */ @@ -51,7 +51,7 @@ static const unsigned short jisx0208_ucs_table[] = { 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000, 0x0000,0x2208,0x220B,0x2286,0x2287,0x2282,0x2283,0x222A, 0x2229,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000, - 0x0000,0x2227,0x2228,0xFFE2,0x21D2,0x21D4,0x2200,0x2203, + 0x0000,0x2227,0x2228,0x00AC,0x21D2,0x21D4,0x2200,0x2203, 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000, 0x0000,0x0000,0x0000,0x2220,0x22A5,0x2312,0x2202,0x2207, 0x2261,0x2252,0x226A,0x226B,0x221A,0x223D,0x221D,0x2235, @@ -1217,9 +1217,9 @@ static const unsigned short jisx0212_ucs_table[] = { /* ku 2 */ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000, 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x02D8,0x02C7, - 0x00B8,0x02D9,0x02DD,0x00AF,0x02DB,0x02DA,0xFF5E,0x0384, + 0x00B8,0x02D9,0x02DD,0x00AF,0x02DB,0x02DA,0x007E,0x0384, 0x0385,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000, - 0x0000,0x00A1,0xFFE4,0x00BF,0x0000,0x0000,0x0000,0x0000, + 0x0000,0x00A1,0x00A6,0x00BF,0x0000,0x0000,0x0000,0x0000, 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000, 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000, 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000, @@ -2471,7 +2471,7 @@ static const unsigned short ucs_a2_jis_table[] = { /* 2100h */ 0x0000,0x0000,0x0000,0x216E,0x0000,0x0000,0x0000,0x0000, 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000, - 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x2d62,0x0000, + 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0xA2F1,0x0000, 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000, 0x0000,0x0000,0xA2EF,0x0000,0x0000,0x0000,0x0000,0x0000, 0x0000,0x0000,0x0000,0x2272,0x0000,0x0000,0x0000,0x0000, diff --git a/ext/mbstring/libmbfl/filters/unicode_table_koi8u.h b/ext/mbstring/libmbfl/filters/unicode_table_koi8u.h new file mode 100644 index 0000000000..f3a4e9de63 --- /dev/null +++ b/ext/mbstring/libmbfl/filters/unicode_table_koi8u.h @@ -0,0 +1,166 @@ +/* + * "streamable kanji code filter and converter" + * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved. + * + * LICENSE NOTICES + * + * This file is part of "streamable kanji code filter and converter", + * which is distributed under the terms of GNU Lesser General Public + * License (version 2) as published by the Free Software Foundation. + * + * This software is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with "streamable kanji code filter and converter"; + * if not, write to the Free Software Foundation, Inc., 59 Temple Place, + * Suite 330, Boston, MA 02111-1307 USA + * + * The author of this part: Maksym Veremeyenko + * + */ + +#ifndef UNICODE_TABLE_KOI8U_H +#define UNICODE_TABLE_KOI8U_H + +/* KOI8-U (RFC2319) to Unicode */ +static const unsigned short koi8u_ucs_table[] = { + 0x2500, /* BOX DRAWINGS LIGHT HORIZONTAL */ + 0x2502, /* BOX DRAWINGS LIGHT VERTICAL */ + 0x250C, /* BOX DRAWINGS LIGHT DOWN AND RIGHT */ + 0x2510, /* BOX DRAWINGS LIGHT DOWN AND LEFT */ + 0x2514, /* BOX DRAWINGS LIGHT UP AND RIGHT */ + 0x2518, /* BOX DRAWINGS LIGHT UP AND LEFT */ + 0x251C, /* BOX DRAWINGS LIGHT VERTICAL AND RIGHT */ + 0x2524, /* BOX DRAWINGS LIGHT VERTICAL AND LEFT */ + 0x252C, /* BOX DRAWINGS LIGHT DOWN AND HORIZONTAL */ + 0x2534, /* BOX DRAWINGS LIGHT UP AND HORIZONTAL */ + 0x253C, /* BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL */ + 0x2580, /* UPPER HALF BLOCK */ + 0x2584, /* LOWER HALF BLOCK */ + 0x2588, /* FULL BLOCK */ + 0x258C, /* LEFT HALF BLOCK */ + 0x2590, /* RIGHT HALF BLOCK */ + 0x2591, /* LIGHT SHADE */ + 0x2592, /* MEDIUM SHADE */ + 0x2593, /* DARK SHADE */ + 0x2320, /* TOP HALF INTEGRAL */ + 0x25A0, /* BLACK SQUARE */ + 0x2219, /* BULLET OPERATOR */ + 0x221A, /* SQUARE ROOT */ + 0x2248, /* ALMOST EQUAL TO */ + 0x2264, /* LESS THAN OR EQUAL TO */ + 0x2265, /* GREATER THAN OR EQUAL TO */ + 0x00A0, /* NO-BREAK SPACE */ + 0x2321, /* BOTTOM HALF INTEGRAL */ + 0x00B0, /* DEGREE SIGN */ + 0x00B2, /* SUPERSCRIPT TWO */ + 0x00B7, /* MIDDLE DOT */ + 0x00F7, /* DIVISION SIGN */ + 0x2550, /* BOX DRAWINGS DOUBLE HORIZONTAL */ + 0x2551, /* BOX DRAWINGS DOUBLE VERTICAL */ + 0x2552, /* BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE */ + 0x0451, /* CYRILLIC SMALL LETTER IO */ + 0x0454, /* CYRILLIC SMALL LETTER UKRAINIAN IE */ + 0x2554, /* BOX DRAWINGS DOUBLE DOWN AND RIGHT */ + 0x0456, /* CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I */ + 0x0457, /* CYRILLIC SMALL LETTER YI (UKRAINIAN) */ + 0x2557, /* BOX DRAWINGS DOUBLE DOWN AND LEFT */ + 0x2558, /* BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE */ + 0x2559, /* BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE */ + 0x255A, /* BOX DRAWINGS DOUBLE UP AND RIGHT */ + 0x255B, /* BOX DRAWINGS UP SINGLE AND LEFT DOUBLE */ + 0x0491, /* CYRILLIC SMALL LETTER GHE WITH UPTURN */ + 0x255D, /* BOX DRAWINGS DOUBLE UP AND LEFT */ + 0x255E, /* BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE */ + 0x255F, /* BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE */ + 0x2560, /* BOX DRAWINGS DOUBLE VERTICAL AND RIGHT */ + 0x2561, /* BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE */ + 0x0401, /* CYRILLIC CAPITAL LETTER IO */ + 0x0404, /* CYRILLIC CAPITAL LETTER UKRAINIAN IE */ + 0x2563, /* BOX DRAWINGS DOUBLE VERTICAL AND LEFT */ + 0x0406, /* CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I */ + 0x0407, /* CYRILLIC CAPITAL LETTER YI (UKRAINIAN) */ + 0x2566, /* BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL */ + 0x2567, /* BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE */ + 0x2568, /* BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE */ + 0x2569, /* BOX DRAWINGS DOUBLE UP AND HORIZONTAL */ + 0x256A, /* BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE */ + 0x0490, /* CYRILLIC CAPITAL LETTER GHE WITH UPTURN */ + 0x256C, /* BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL */ + 0x00A9, /* COPYRIGHT SIGN */ + 0x044E, /* CYRILLIC SMALL LETTER YU */ + 0x0430, /* CYRILLIC SMALL LETTER A */ + 0x0431, /* CYRILLIC SMALL LETTER BE */ + 0x0446, /* CYRILLIC SMALL LETTER TSE */ + 0x0434, /* CYRILLIC SMALL LETTER DE */ + 0x0435, /* CYRILLIC SMALL LETTER IE */ + 0x0444, /* CYRILLIC SMALL LETTER EF */ + 0x0433, /* CYRILLIC SMALL LETTER GHE */ + 0x0445, /* CYRILLIC SMALL LETTER KHA */ + 0x0438, /* CYRILLIC SMALL LETTER I */ + 0x0439, /* CYRILLIC SMALL LETTER SHORT I */ + 0x043A, /* CYRILLIC SMALL LETTER KA */ + 0x043B, /* CYRILLIC SMALL LETTER EL */ + 0x043C, /* CYRILLIC SMALL LETTER EM */ + 0x043D, /* CYRILLIC SMALL LETTER EN */ + 0x043E, /* CYRILLIC SMALL LETTER O */ + 0x043F, /* CYRILLIC SMALL LETTER PE */ + 0x044F, /* CYRILLIC SMALL LETTER YA */ + 0x0440, /* CYRILLIC SMALL LETTER ER */ + 0x0441, /* CYRILLIC SMALL LETTER ES */ + 0x0442, /* CYRILLIC SMALL LETTER TE */ + 0x0443, /* CYRILLIC SMALL LETTER U */ + 0x0436, /* CYRILLIC SMALL LETTER ZHE */ + 0x0432, /* CYRILLIC SMALL LETTER VE */ + 0x044C, /* CYRILLIC SMALL LETTER SOFT SIGN */ + 0x044B, /* CYRILLIC SMALL LETTER YERU */ + 0x0437, /* CYRILLIC SMALL LETTER ZE */ + 0x0448, /* CYRILLIC SMALL LETTER SHA */ + 0x044D, /* CYRILLIC SMALL LETTER E */ + 0x0449, /* CYRILLIC SMALL LETTER SHCHA */ + 0x0447, /* CYRILLIC SMALL LETTER CHE */ + 0x044A, /* CYRILLIC SMALL LETTER HARD SIGN */ + 0x042E, /* CYRILLIC CAPITAL LETTER YU */ + 0x0410, /* CYRILLIC CAPITAL LETTER A */ + 0x0411, /* CYRILLIC CAPITAL LETTER BE */ + 0x0426, /* CYRILLIC CAPITAL LETTER TSE */ + 0x0414, /* CYRILLIC CAPITAL LETTER DE */ + 0x0415, /* CYRILLIC CAPITAL LETTER IE */ + 0x0424, /* CYRILLIC CAPITAL LETTER EF */ + 0x0413, /* CYRILLIC CAPITAL LETTER GHE */ + 0x0425, /* CYRILLIC CAPITAL LETTER KHA */ + 0x0418, /* CYRILLIC CAPITAL LETTER I */ + 0x0419, /* CYRILLIC CAPITAL LETTER SHORT I */ + 0x041A, /* CYRILLIC CAPITAL LETTER KA */ + 0x041B, /* CYRILLIC CAPITAL LETTER EL */ + 0x041C, /* CYRILLIC CAPITAL LETTER EM */ + 0x041D, /* CYRILLIC CAPITAL LETTER EN */ + 0x041E, /* CYRILLIC CAPITAL LETTER O */ + 0x041F, /* CYRILLIC CAPITAL LETTER PE */ + 0x042F, /* CYRILLIC CAPITAL LETTER YA */ + 0x0420, /* CYRILLIC CAPITAL LETTER ER */ + 0x0421, /* CYRILLIC CAPITAL LETTER ES */ + 0x0422, /* CYRILLIC CAPITAL LETTER TE */ + 0x0423, /* CYRILLIC CAPITAL LETTER U */ + 0x0416, /* CYRILLIC CAPITAL LETTER ZHE */ + 0x0412, /* CYRILLIC CAPITAL LETTER VE */ + 0x042C, /* CYRILLIC CAPITAL LETTER SOFT SIGN */ + 0x042B, /* CYRILLIC CAPITAL LETTER YERU */ + 0x0417, /* CYRILLIC CAPITAL LETTER ZE */ + 0x0428, /* CYRILLIC CAPITAL LETTER SHA */ + 0x042D, /* CYRILLIC CAPITAL LETTER E */ + 0x0429, /* CYRILLIC CAPITAL LETTER SHCHA */ + 0x0427, /* CYRILLIC CAPITAL LETTER CHE */ + 0x042A /* CYRILLIC CAPITAL LETTER HARD SIGN */ +}; +static const int koi8u_ucs_table_min = 0x80; +static const int koi8u_ucs_table_len = (sizeof (koi8u_ucs_table) / sizeof (unsigned short)); +static const int koi8u_ucs_table_max = 0x80 + (sizeof (koi8u_ucs_table) / sizeof (unsigned short)); + + + +#endif /* UNNICODE_TABLE_KOI8U_H */ + diff --git a/ext/mbstring/libmbfl/libmbfl.dsp b/ext/mbstring/libmbfl/libmbfl.dsp index ddb4f90032..a6d9cfc720 100644 --- a/ext/mbstring/libmbfl/libmbfl.dsp +++ b/ext/mbstring/libmbfl/libmbfl.dsp @@ -243,6 +243,10 @@ SOURCE=.\filters\mbfilter_koi8r.c # End Source File # Begin Source File +SOURCE=.\filters\mbfilter_koi8u.c +# End Source File +# Begin Source File + SOURCE=.\filters\mbfilter_armscii8.c # End Source File # Begin Source File @@ -556,6 +560,10 @@ SOURCE=.\filters\mbfilter_koi8r.h # End Source File # Begin Source File +SOURCE=.\filters\mbfilter_koi8u.h +# End Source File +# Begin Source File + SOURCE=.\filters\mbfilter_armscii8.h # End Source File # Begin Source File @@ -776,6 +784,10 @@ SOURCE=.\filters\unicode_table_koi8r.h # End Source File # Begin Source File +SOURCE=.\filters\unicode_table_koi8u.h +# End Source File +# Begin Source File + SOURCE=.\filters\unicode_table_armscii8.h # End Source File # Begin Source File diff --git a/ext/mbstring/libmbfl/libmbfl.sln b/ext/mbstring/libmbfl/libmbfl.sln index f49f0c0d86..becef513a5 100755 --- a/ext/mbstring/libmbfl/libmbfl.sln +++ b/ext/mbstring/libmbfl/libmbfl.sln @@ -1,21 +1,19 @@ -Microsoft Visual Studio Solution File, Format Version 7.00 +Microsoft Visual Studio Solution File, Format Version 10.00 +# Visual C++ Express 2008 Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "libmbfl", "libmbfl.vcproj", "{B3636594-A785-4270-A765-8EAE922B5207}" EndProject Global - GlobalSection(SolutionConfiguration) = preSolution - ConfigName.0 = Debug - ConfigName.1 = Release + GlobalSection(SolutionConfigurationPlatforms) = preSolution + Debug|Win32 = Debug|Win32 + Release|Win32 = Release|Win32 EndGlobalSection - GlobalSection(ProjectDependencies) = postSolution + GlobalSection(ProjectConfigurationPlatforms) = postSolution + {B3636594-A785-4270-A765-8EAE922B5207}.Debug|Win32.ActiveCfg = Debug|Win32 + {B3636594-A785-4270-A765-8EAE922B5207}.Debug|Win32.Build.0 = Debug|Win32 + {B3636594-A785-4270-A765-8EAE922B5207}.Release|Win32.ActiveCfg = Release|Win32 + {B3636594-A785-4270-A765-8EAE922B5207}.Release|Win32.Build.0 = Release|Win32 EndGlobalSection - GlobalSection(ProjectConfiguration) = postSolution - {B3636594-A785-4270-A765-8EAE922B5207}.Debug.ActiveCfg = Debug|Win32 - {B3636594-A785-4270-A765-8EAE922B5207}.Debug.Build.0 = Debug|Win32 - {B3636594-A785-4270-A765-8EAE922B5207}.Release.ActiveCfg = Release|Win32 - {B3636594-A785-4270-A765-8EAE922B5207}.Release.Build.0 = Release|Win32 - EndGlobalSection - GlobalSection(ExtensibilityGlobals) = postSolution - EndGlobalSection - GlobalSection(ExtensibilityAddIns) = postSolution + GlobalSection(SolutionProperties) = preSolution + HideSolutionNode = FALSE EndGlobalSection EndGlobal diff --git a/ext/mbstring/libmbfl/libmbfl.vcproj b/ext/mbstring/libmbfl/libmbfl.vcproj index 29e0af0a27..0111012d65 100755 --- a/ext/mbstring/libmbfl/libmbfl.vcproj +++ b/ext/mbstring/libmbfl/libmbfl.vcproj @@ -1,24 +1,50 @@ - + + RootNamespace="libmbfl" + TargetFrameworkVersion="131072" + > + Name="Win32" + /> + + + ATLMinimizesCRunTimeLibraryUsage="false" + CharacterSet="2" + > + + + + + + CompileAs="0" + /> + Name="VCManagedResourceCompilerTool" + /> + + + RandomizedBaseAddress="1" + DataExecutionPrevention="0" + ImportLibrary=".\Debug/mbfl.lib" + /> + Name="VCALinkTool" + /> + Name="VCManifestTool" + /> + Name="VCXDCMakeTool" + /> + Name="VCBscMakeTool" + /> + Name="VCFxCopTool" + /> + Name="VCAppVerifierTool" + /> + Name="VCPostBuildEventTool" + /> + ATLMinimizesCRunTimeLibraryUsage="false" + CharacterSet="2" + > + + + + + + SuppressStartupBanner="true" + CompileAs="0" + /> + + Name="VCResourceCompilerTool" + PreprocessorDefinitions="NDEBUG" + Culture="1033" + /> + + RandomizedBaseAddress="1" + DataExecutionPrevention="0" + ImportLibrary=".\Release/mbfl.lib" + /> + Name="VCALinkTool" + /> + Name="VCManifestTool" + /> + Name="VCXDCMakeTool" + /> + Name="VCBscMakeTool" + /> + Name="VCFxCopTool" + /> + Name="VCAppVerifierTool" + /> + Name="VCPostBuildEventTool" + /> + + + Filter="vc6" + > + + + + + RelativePath=".\filters\mbfilter_7bit.c" + > + RelativePath=".\mbfl\mbfilter_8bit.c" + > + RelativePath=".\filters\mbfilter_ascii.c" + > + RelativePath=".\filters\mbfilter_base64.c" + > + RelativePath=".\filters\mbfilter_big5.c" + > + RelativePath=".\filters\mbfilter_byte2.c" + > + RelativePath=".\filters\mbfilter_byte4.c" + > + RelativePath=".\filters\mbfilter_cp1251.c" + > + RelativePath=".\filters\mbfilter_cp1252.c" + > + RelativePath=".\filters\mbfilter_cp1254.c" + > + RelativePath=".\filters\mbfilter_cp866.c" + > + RelativePath=".\filters\mbfilter_cp932.c" + > + RelativePath=".\filters\mbfilter_cp936.c" + > + RelativePath=".\filters\mbfilter_euc_cn.c" + > + RelativePath=".\filters\mbfilter_euc_jp.c" + > + RelativePath=".\filters\mbfilter_euc_jp_win.c" + > + RelativePath=".\filters\mbfilter_euc_kr.c" + > + RelativePath=".\filters\mbfilter_euc_tw.c" + > + RelativePath=".\filters\mbfilter_htmlent.c" + > + RelativePath=".\filters\mbfilter_hz.c" + > + RelativePath=".\filters\mbfilter_iso2022_kr.c" + > + RelativePath=".\filters\mbfilter_iso8859_1.c" + > + RelativePath=".\filters\mbfilter_iso8859_10.c" + > + RelativePath=".\filters\mbfilter_iso8859_13.c" + > + RelativePath=".\filters\mbfilter_iso8859_14.c" + > + RelativePath=".\filters\mbfilter_iso8859_15.c" + > + RelativePath=".\filters\mbfilter_iso8859_16.c" + > + RelativePath=".\filters\mbfilter_iso8859_2.c" + > + RelativePath=".\filters\mbfilter_iso8859_3.c" + > + RelativePath=".\filters\mbfilter_iso8859_4.c" + > + RelativePath=".\filters\mbfilter_iso8859_5.c" + > + RelativePath=".\filters\mbfilter_iso8859_6.c" + > + RelativePath=".\filters\mbfilter_iso8859_7.c" + > + RelativePath=".\filters\mbfilter_iso8859_8.c" + > + RelativePath=".\filters\mbfilter_iso8859_9.c" + > + RelativePath=".\filters\mbfilter_jis.c" + > + RelativePath=".\filters\mbfilter_koi8r.c" + > + RelativePath=".\filters\mbfilter_koi8u.c" + > + RelativePath=".\mbfl\mbfilter_pass.c" + > + RelativePath=".\filters\mbfilter_qprint.c" + > + RelativePath=".\filters\mbfilter_sjis.c" + > + RelativePath=".\filters\mbfilter_ucs2.c" + > + RelativePath=".\filters\mbfilter_ucs4.c" + > + RelativePath=".\filters\mbfilter_uhc.c" + > + RelativePath=".\filters\mbfilter_utf16.c" + > + RelativePath=".\filters\mbfilter_utf32.c" + > + RelativePath=".\filters\mbfilter_utf7.c" + > + RelativePath=".\filters\mbfilter_utf7imap.c" + > + RelativePath=".\filters\mbfilter_utf8.c" + > + RelativePath=".\filters\mbfilter_uuencode.c" + > + RelativePath=".\mbfl\mbfilter_wchar.c" + > + RelativePath=".\mbfl\mbfl_allocators.c" + > + RelativePath=".\mbfl\mbfl_convert.c" + > + RelativePath=".\mbfl\mbfl_encoding.c" + > + RelativePath=".\mbfl\mbfl_filter_output.c" + > + RelativePath=".\mbfl\mbfl_ident.c" + > + RelativePath=".\mbfl\mbfl_language.c" + > + RelativePath=".\mbfl\mbfl_memory_device.c" + > + RelativePath=".\mbfl\mbfl_string.c" + > + RelativePath=".\nls\nls_de.c" + > + RelativePath=".\nls\nls_en.c" + > + RelativePath=".\nls\nls_ja.c" + > + RelativePath=".\nls\nls_kr.c" + > + RelativePath=".\nls\nls_neutral.c" + > + RelativePath=".\nls\nls_ru.c" + > + RelativePath=".\nls\nls_uni.c" + > + RelativePath=".\nls\nls_zh.c" + > + Filter="h;hpp;hxx;hm;inl" + > + RelativePath=".\config.h.vc6" + > + Name="Debug|Win32" + > + CommandLine="copy $(InputDir)\config.h.vc6 "$(InputDir)\config.h" " + Outputs="$(InputDir)\config.h" + /> + Name="Release|Win32" + > + CommandLine="copy $(InputDir)\config.h.vc6 "$(InputDir)\config.h" " + Outputs="$(InputDir)\config.h" + /> + RelativePath=".\filters\cp932_table.h" + > + + + + + + + + + + + + + + + + + + + + + + + + + RelativePath=".\filters\mbfilter_cp1254.h" + > + RelativePath=".\filters\mbfilter_cp5022x.h" + > + RelativePath=".\filters\mbfilter_cp51932.h" + > + RelativePath=".\filters\mbfilter_cp866.h" + > + RelativePath=".\filters\mbfilter_cp932.h" + > + RelativePath=".\filters\mbfilter_cp936.h" + > + RelativePath=".\filters\mbfilter_euc_cn.h" + > + RelativePath=".\filters\mbfilter_euc_jp.h" + > + RelativePath=".\filters\mbfilter_euc_jp_win.h" + > + RelativePath=".\filters\mbfilter_euc_kr.h" + > + RelativePath=".\filters\mbfilter_euc_tw.h" + > + RelativePath=".\filters\mbfilter_htmlent.h" + > + RelativePath=".\filters\mbfilter_hz.h" + > + RelativePath=".\filters\mbfilter_iso2022_kr.h" + > + RelativePath=".\filters\mbfilter_iso8859_1.h" + > + RelativePath=".\filters\mbfilter_iso8859_10.h" + > + RelativePath=".\filters\mbfilter_iso8859_13.h" + > + RelativePath=".\filters\mbfilter_iso8859_14.h" + > + RelativePath=".\filters\mbfilter_iso8859_15.h" + > + RelativePath=".\filters\mbfilter_iso8859_16.h" + > + RelativePath=".\filters\mbfilter_iso8859_2.h" + > + RelativePath=".\filters\mbfilter_iso8859_3.h" + > + RelativePath=".\filters\mbfilter_iso8859_4.h" + > + RelativePath=".\filters\mbfilter_iso8859_5.h" + > + RelativePath=".\filters\mbfilter_iso8859_6.h" + > + RelativePath=".\filters\mbfilter_iso8859_7.h" + > + RelativePath=".\filters\mbfilter_iso8859_8.h" + > + RelativePath=".\filters\mbfilter_iso8859_9.h" + > + RelativePath=".\filters\mbfilter_jis.h" + > + RelativePath=".\filters\mbfilter_koi8r.h" + > + RelativePath=".\filters\mbfilter_koi8u.h" + > + RelativePath=".\mbfl\mbfilter_pass.h" + > + RelativePath=".\filters\mbfilter_qprint.h" + > + RelativePath=".\filters\mbfilter_sjis.h" + > + RelativePath=".\filters\mbfilter_tl_jisx0201_jisx0208.h" + > + RelativePath=".\filters\mbfilter_ucs2.h" + > + RelativePath=".\filters\mbfilter_ucs4.h" + > + RelativePath=".\filters\mbfilter_uhc.h" + > + RelativePath=".\filters\mbfilter_utf16.h" + > + RelativePath=".\filters\mbfilter_utf32.h" + > + RelativePath=".\filters\mbfilter_utf7.h" + > + RelativePath=".\filters\mbfilter_utf7imap.h" + > + RelativePath=".\filters\mbfilter_utf8.h" + > + RelativePath=".\filters\mbfilter_uuencode.h" + > + RelativePath=".\mbfl\mbfilter_wchar.h" + > + RelativePath=".\mbfl\mbfl_allocators.h" + > + RelativePath=".\mbfl\mbfl_consts.h" + > + RelativePath=".\mbfl\mbfl_convert.h" + > + RelativePath=".\mbfl\mbfl_encoding.h" + > + RelativePath=".\mbfl\mbfl_filter_output.h" + > + RelativePath=".\mbfl\mbfl_ident.h" + > + RelativePath=".\mbfl\mbfl_language.h" + > + RelativePath=".\mbfl\mbfl_memory_device.h" + > + RelativePath=".\mbfl\mbfl_string.h" + > + RelativePath=".\nls\nls_de.h" + > + RelativePath=".\nls\nls_en.h" + > + RelativePath=".\nls\nls_hy.h" + > + RelativePath=".\nls\nls_ja.h" + > + RelativePath=".\nls\nls_kr.h" + > + RelativePath=".\nls\nls_neutral.h" + > + RelativePath=".\nls\nls_ru.h" + > + RelativePath=".\nls\nls_tr.h" + > + RelativePath=".\nls\nls_ua.h" + > + RelativePath=".\nls\nls_uni.h" + > + RelativePath=".\nls\nls_zh.h" + > + RelativePath=".\filters\unicode_prop.h" + > + RelativePath=".\filters\unicode_table_armscii8.h" + > + RelativePath=".\filters\unicode_table_big5.h" + > + RelativePath=".\filters\unicode_table_cns11643.h" + > + RelativePath=".\filters\unicode_table_cp1251.h" + > + RelativePath=".\filters\unicode_table_cp1252.h" + > + RelativePath=".\filters\unicode_table_cp1254.h" + > + RelativePath=".\filters\unicode_table_cp866.h" + > + RelativePath=".\filters\unicode_table_cp932_ext.h" + > + RelativePath=".\filters\unicode_table_cp936.h" + > + RelativePath=".\filters\unicode_table_iso8859_10.h" + > + RelativePath=".\filters\unicode_table_iso8859_13.h" + > + RelativePath=".\filters\unicode_table_iso8859_14.h" + > + RelativePath=".\filters\unicode_table_iso8859_15.h" + > + RelativePath=".\filters\unicode_table_iso8859_16.h" + > + RelativePath=".\filters\unicode_table_iso8859_2.h" + > + RelativePath=".\filters\unicode_table_iso8859_3.h" + > + RelativePath=".\filters\unicode_table_iso8859_4.h" + > + RelativePath=".\filters\unicode_table_iso8859_5.h" + > + RelativePath=".\filters\unicode_table_iso8859_6.h" + > + RelativePath=".\filters\unicode_table_iso8859_7.h" + > + RelativePath=".\filters\unicode_table_iso8859_8.h" + > + RelativePath=".\filters\unicode_table_iso8859_9.h" + > + RelativePath=".\filters\unicode_table_jis.h" + > + RelativePath=".\filters\unicode_table_koi8r.h" + > + RelativePath=".\filters\unicode_table_koi8u.h" + > + RelativePath=".\filters\unicode_table_uhc.h" + > + Filter="ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe" + > + RelativePath=".\mbfl.rc" + > + + + + + + + + + + + + + + diff --git a/ext/mbstring/libmbfl/mbfl.rc b/ext/mbstring/libmbfl/mbfl.rc index 655e61893a..9d6a0c78e5 100644 --- a/ext/mbstring/libmbfl/mbfl.rc +++ b/ext/mbstring/libmbfl/mbfl.rc @@ -1,7 +1,7 @@ /* $Id$ */ 1 VERSIONINFO -FILEVERSION 1,1,0,0 -PRODUCTVERSION 1,1,0,0 +FILEVERSION 1,0,2,0 +PRODUCTVERSION 1,0,2,0 FILEFLAGSMASK 0 FILEOS 0x40000 FILETYPE 1 @@ -12,12 +12,12 @@ FILETYPE 1 { VALUE "CompanyName", "-\0" VALUE "FileDescription", "streamable kanji code filter\0" - VALUE "FileVersion", "1.1.0\0" + VALUE "FileVersion", "1.0.2\0" VALUE "InternalName", "mbfl\0" VALUE "LegalCopyright", "GNU Lesser Public License Version 2.0\0" VALUE "OriginalFilename", "mbfl.dll\0" VALUE "ProductName", "mbfl\0" - VALUE "ProductVersion", "1.1.0\0" + VALUE "ProductVersion", "1.0.2\0" } } } diff --git a/ext/mbstring/libmbfl/mbfl/Makefile.am b/ext/mbstring/libmbfl/mbfl/Makefile.am index 25d6734541..6e662d14e5 100644 --- a/ext/mbstring/libmbfl/mbfl/Makefile.am +++ b/ext/mbstring/libmbfl/mbfl/Makefile.am @@ -1,12 +1,37 @@ EXTRA_DIST=Makefile.bcc32 mk_eaw_tbl.awk lib_LTLIBRARIES=libmbfl.la -libmbfl_la_SOURCES=mbfilter.c mbfl_string.c mbfl_language.c mbfl_encoding.c mbfl_convert.c mbfl_ident.c mbfl_memory_device.c mbfl_allocators.c mbfl_filter_output.c mbfilter_pass.c mbfilter_wchar.c mbfilter_8bit.c eaw_table.h +libmbfl_la_SOURCES=mbfilter.c \ + mbfl_string.c \ + mbfl_language.c \ + mbfl_encoding.c \ + mbfl_convert.c \ + mbfl_ident.c \ + mbfl_memory_device.c \ + mbfl_allocators.c \ + mbfl_filter_output.c \ + mbfilter_pass.c \ + mbfilter_wchar.c \ + mbfilter_8bit.c \ + eaw_table.h libmbfl_filters_la=../filters/libmbfl_filters.la libmbfl_nls_la=../nls/libmbfl_nls.la libmbfl_la_LIBADD=$(libmbfl_filters_la) $(libmbfl_nls_la) libmbfl_la_LDFLAGS=-version-info $(SHLIB_VERSION) libmbfl_includedir=$(includedir)/mbfl -libmbfl_include_HEADERS=mbfilter.h mbfl_consts.h mbfl_encoding.h mbfl_language.h mbfl_string.h mbfl_convert.h mbfl_ident.h mbfl_memory_device.h mbfl_allocators.h mbfl_defs.h mbfl_filter_output.h mbfilter_pass.h mbfilter_wchar.h mbfilter_8bit.h +libmbfl_include_HEADERS=mbfilter.h \ + mbfl_consts.h \ + mbfl_encoding.h \ + mbfl_language.h \ + mbfl_string.h \ + mbfl_convert.h \ + mbfl_ident.h \ + mbfl_memory_device.h \ + mbfl_allocators.h \ + mbfl_defs.h \ + mbfl_filter_output.h \ + mbfilter_pass.h \ + mbfilter_wchar.h \ + mbfilter_8bit.h mbfilter.c: eaw_table.h diff --git a/ext/mbstring/libmbfl/mbfl/Makefile.bcc32 b/ext/mbstring/libmbfl/mbfl/Makefile.bcc32 index 5d43e6a6f4..1b43a49efe 100644 --- a/ext/mbstring/libmbfl/mbfl/Makefile.bcc32 +++ b/ext/mbstring/libmbfl/mbfl/Makefile.bcc32 @@ -1,5 +1,16 @@ !include ..\rules.mak.bcc32 -OBJS=mbfilter.obj mbfilter_8bit.obj mbfilter_pass.obj mbfilter_wchar.obj mbfl_allocators.obj mbfl_convert.obj mbfl_encoding.obj mbfl_filter_output.obj mbfl_ident.obj mbfl_language.obj mbfl_memory_device.obj mbfl_string.obj +OBJS=mbfilter.obj \ + mbfilter_8bit.obj \ + mbfilter_pass.obj \ + mbfilter_wchar.obj \ + mbfl_allocators.obj \ + mbfl_convert.obj \ + mbfl_encoding.obj \ + mbfl_filter_output.obj \ + mbfl_ident.obj \ + mbfl_language.obj \ + mbfl_memory_device.obj \ + mbfl_string.obj all: $(OBJS) diff --git a/ext/mbstring/libmbfl/mbfl/mbfilter.h b/ext/mbstring/libmbfl/mbfl/mbfilter.h index 73b7229c22..a00c51b5f4 100644 --- a/ext/mbstring/libmbfl/mbfl/mbfilter.h +++ b/ext/mbstring/libmbfl/mbfl/mbfilter.h @@ -98,6 +98,13 @@ #include "mbfl_convert.h" #include "mbfl_ident.h" +/* + * version information + */ +#define MBFL_VERSION_MAJOR 1 +#define MBFL_VERSION_MINOR 0 +#define MBFL_VERSION_TEENY 2 + /* * convert filter */ diff --git a/ext/mbstring/libmbfl/mbfl/mbfilter_8bit.h b/ext/mbstring/libmbfl/mbfl/mbfilter_8bit.h index a87c564616..4fc8922605 100644 --- a/ext/mbstring/libmbfl/mbfl/mbfilter_8bit.h +++ b/ext/mbstring/libmbfl/mbfl/mbfilter_8bit.h @@ -34,6 +34,6 @@ #include "mbfl_defs.h" #include "mbfilter.h" -MBFLAPI extern const mbfl_encoding mbfl_encoding_8bit; +extern const mbfl_encoding mbfl_encoding_8bit; #endif /* MBFL_MBFILTER_8BIT_H */ diff --git a/ext/mbstring/libmbfl/mbfl/mbfilter_pass.h b/ext/mbstring/libmbfl/mbfl/mbfilter_pass.h index 49d169c668..087aa2c3be 100644 --- a/ext/mbstring/libmbfl/mbfl/mbfilter_pass.h +++ b/ext/mbstring/libmbfl/mbfl/mbfilter_pass.h @@ -33,8 +33,8 @@ #include "mbfl_defs.h" #include "mbfilter.h" -MBFLAPI extern const mbfl_encoding mbfl_encoding_pass; -MBFLAPI extern const struct mbfl_convert_vtbl vtbl_pass; +extern const mbfl_encoding mbfl_encoding_pass; +extern const struct mbfl_convert_vtbl vtbl_pass; MBFLAPI extern int mbfl_filt_conv_pass(int c, mbfl_convert_filter *filter); diff --git a/ext/mbstring/libmbfl/mbfl/mbfilter_wchar.h b/ext/mbstring/libmbfl/mbfl/mbfilter_wchar.h index 9e9396a77f..24bf7473c1 100644 --- a/ext/mbstring/libmbfl/mbfl/mbfilter_wchar.h +++ b/ext/mbstring/libmbfl/mbfl/mbfilter_wchar.h @@ -34,6 +34,6 @@ #include "mbfl_defs.h" #include "mbfilter.h" -MBFLAPI extern const mbfl_encoding mbfl_encoding_wchar; +extern const mbfl_encoding mbfl_encoding_wchar; #endif /* MBFL_MBFILTER_WCHAR_H */ diff --git a/ext/mbstring/libmbfl/mbfl/mbfl_convert.c b/ext/mbstring/libmbfl/mbfl/mbfl_convert.c index 17e00dd595..725a674b39 100644 --- a/ext/mbstring/libmbfl/mbfl/mbfl_convert.c +++ b/ext/mbstring/libmbfl/mbfl/mbfl_convert.c @@ -53,15 +53,18 @@ #include "filters/mbfilter_sjis.h" #include "filters/mbfilter_cp51932.h" #include "filters/mbfilter_jis.h" +#include "filters/mbfilter_iso2022_jp_ms.h" #include "filters/mbfilter_euc_jp.h" #include "filters/mbfilter_euc_jp_win.h" #include "filters/mbfilter_ascii.h" #include "filters/mbfilter_koi8r.h" +#include "filters/mbfilter_koi8u.h" #include "filters/mbfilter_cp866.h" #include "filters/mbfilter_cp932.h" #include "filters/mbfilter_cp936.h" #include "filters/mbfilter_cp1251.h" #include "filters/mbfilter_cp1252.h" +#include "filters/mbfilter_cp1254.h" #include "filters/mbfilter_iso8859_1.h" #include "filters/mbfilter_iso8859_2.h" #include "filters/mbfilter_iso8859_3.h" @@ -140,8 +143,12 @@ const struct mbfl_convert_vtbl *mbfl_convert_filter_list[] = { &vtbl_wchar_cp866, &vtbl_koi8r_wchar, &vtbl_wchar_koi8r, + &vtbl_koi8u_wchar, + &vtbl_wchar_koi8u, &vtbl_cp1252_wchar, &vtbl_wchar_cp1252, + &vtbl_cp1254_wchar, + &vtbl_wchar_cp1254, &vtbl_ascii_wchar, &vtbl_wchar_ascii, &vtbl_8859_1_wchar, diff --git a/ext/mbstring/libmbfl/mbfl/mbfl_encoding.c b/ext/mbstring/libmbfl/mbfl/mbfl_encoding.c index 2e5b4abaf8..76956f0530 100644 --- a/ext/mbstring/libmbfl/mbfl/mbfl_encoding.c +++ b/ext/mbstring/libmbfl/mbfl/mbfl_encoding.c @@ -59,15 +59,18 @@ #include "filters/mbfilter_sjis.h" #include "filters/mbfilter_cp51932.h" #include "filters/mbfilter_jis.h" +#include "filters/mbfilter_iso2022_jp_ms.h" #include "filters/mbfilter_euc_jp.h" #include "filters/mbfilter_euc_jp_win.h" #include "filters/mbfilter_ascii.h" #include "filters/mbfilter_koi8r.h" +#include "filters/mbfilter_koi8u.h" #include "filters/mbfilter_cp866.h" #include "filters/mbfilter_cp932.h" #include "filters/mbfilter_cp936.h" #include "filters/mbfilter_cp1251.h" #include "filters/mbfilter_cp1252.h" +#include "filters/mbfilter_cp1254.h" #include "filters/mbfilter_iso8859_1.h" #include "filters/mbfilter_iso8859_2.h" #include "filters/mbfilter_iso8859_3.h" @@ -156,6 +159,7 @@ static const mbfl_encoding *mbfl_encoding_ptr_list[] = { &mbfl_encoding_2022jp, &mbfl_encoding_2022jpms, &mbfl_encoding_cp1252, + &mbfl_encoding_cp1254, &mbfl_encoding_8859_1, &mbfl_encoding_8859_2, &mbfl_encoding_8859_3, @@ -181,6 +185,7 @@ static const mbfl_encoding *mbfl_encoding_ptr_list[] = { &mbfl_encoding_cp1251, &mbfl_encoding_cp866, &mbfl_encoding_koi8r, + &mbfl_encoding_koi8u, &mbfl_encoding_armscii8, &mbfl_encoding_cp850, NULL @@ -197,16 +202,16 @@ mbfl_name2encoding(const char *name) return NULL; } - i = 0; - while ((encoding = mbfl_encoding_ptr_list[i++]) != NULL){ + i = 0; + while ((encoding = mbfl_encoding_ptr_list[i++]) != NULL){ if (strcasecmp(encoding->name, name) == 0) { return encoding; } } - /* serch MIME charset name */ - i = 0; - while ((encoding = mbfl_encoding_ptr_list[i++]) != NULL) { + /* search MIME charset name */ + i = 0; + while ((encoding = mbfl_encoding_ptr_list[i++]) != NULL) { if (encoding->mime_name != NULL) { if (strcasecmp(encoding->mime_name, name) == 0) { return encoding; @@ -214,12 +219,12 @@ mbfl_name2encoding(const char *name) } } - /* serch aliases */ - i = 0; - while ((encoding = mbfl_encoding_ptr_list[i++]) != NULL) { + /* search aliases */ + i = 0; + while ((encoding = mbfl_encoding_ptr_list[i++]) != NULL) { if (encoding->aliases != NULL) { - j = 0; - while ((*encoding->aliases)[j] != NULL) { + j = 0; + while ((*encoding->aliases)[j] != NULL) { if (strcasecmp((*encoding->aliases)[j], name) == 0) { return encoding; } diff --git a/ext/mbstring/libmbfl/mbfl/mbfl_encoding.h b/ext/mbstring/libmbfl/mbfl/mbfl_encoding.h index 351a217170..2599e1107e 100644 --- a/ext/mbstring/libmbfl/mbfl/mbfl_encoding.h +++ b/ext/mbstring/libmbfl/mbfl/mbfl_encoding.h @@ -75,6 +75,7 @@ enum mbfl_no_encoding { mbfl_no_encoding_2022jp, mbfl_no_encoding_2022jpms, mbfl_no_encoding_cp1252, + mbfl_no_encoding_cp1254, mbfl_no_encoding_8859_1, mbfl_no_encoding_8859_2, mbfl_no_encoding_8859_3, @@ -99,6 +100,7 @@ enum mbfl_no_encoding { mbfl_no_encoding_cp1251, mbfl_no_encoding_cp866, mbfl_no_encoding_koi8r, + mbfl_no_encoding_koi8u, mbfl_no_encoding_8859_16, mbfl_no_encoding_armscii8, mbfl_no_encoding_cp850, diff --git a/ext/mbstring/libmbfl/mbfl/mbfl_ident.c b/ext/mbstring/libmbfl/mbfl/mbfl_ident.c index ade0f2a1ab..4f3bd5c58d 100644 --- a/ext/mbstring/libmbfl/mbfl/mbfl_ident.c +++ b/ext/mbstring/libmbfl/mbfl/mbfl_ident.c @@ -51,15 +51,19 @@ #include "filters/mbfilter_iso2022_kr.h" #include "filters/mbfilter_sjis.h" #include "filters/mbfilter_jis.h" +#include "filters/mbfilter_iso2022_jp_ms.h" #include "filters/mbfilter_euc_jp.h" #include "filters/mbfilter_euc_jp_win.h" #include "filters/mbfilter_ascii.h" #include "filters/mbfilter_koi8r.h" +#include "filters/mbfilter_koi8u.h" #include "filters/mbfilter_cp866.h" #include "filters/mbfilter_cp932.h" #include "filters/mbfilter_cp936.h" #include "filters/mbfilter_cp1251.h" #include "filters/mbfilter_cp1252.h" +#include "filters/mbfilter_cp1254.h" +#include "filters/mbfilter_cp51932.h" #include "filters/mbfilter_iso8859_1.h" #include "filters/mbfilter_iso8859_2.h" #include "filters/mbfilter_iso8859_3.h" @@ -108,6 +112,7 @@ static const struct mbfl_identify_vtbl *mbfl_identify_filter_list[] = { &vtbl_identify_jis, &vtbl_identify_2022jp, &vtbl_identify_2022jpms, + &vtbl_identify_cp51932, &vtbl_identify_euccn, &vtbl_identify_cp936, &vtbl_identify_hz, @@ -119,7 +124,9 @@ static const struct mbfl_identify_vtbl *mbfl_identify_filter_list[] = { &vtbl_identify_cp1251, &vtbl_identify_cp866, &vtbl_identify_koi8r, + &vtbl_identify_koi8u, &vtbl_identify_cp1252, + &vtbl_identify_cp1254, &vtbl_identify_8859_1, &vtbl_identify_8859_2, &vtbl_identify_8859_3, diff --git a/ext/mbstring/libmbfl/mbfl/mbfl_language.c b/ext/mbstring/libmbfl/mbfl/mbfl_language.c index aaeebbc8e8..4dd9726362 100644 --- a/ext/mbstring/libmbfl/mbfl/mbfl_language.c +++ b/ext/mbstring/libmbfl/mbfl/mbfl_language.c @@ -57,6 +57,7 @@ #include "nls/nls_uni.h" #include "nls/nls_de.h" #include "nls/nls_ru.h" +#include "nls/nls_ua.h" #include "nls/nls_en.h" #include "nls/nls_hy.h" #include "nls/nls_tr.h" @@ -77,6 +78,7 @@ static const mbfl_language *mbfl_language_ptr_table[] = { &mbfl_language_english, &mbfl_language_german, &mbfl_language_russian, + &mbfl_language_ukrainian, &mbfl_language_armenian, &mbfl_language_turkish, &mbfl_language_neutral, diff --git a/ext/mbstring/libmbfl/mbfl/mbfl_language.h b/ext/mbstring/libmbfl/mbfl/mbfl_language.h index caf1d80940..af42a010cf 100644 --- a/ext/mbstring/libmbfl/mbfl/mbfl_language.h +++ b/ext/mbstring/libmbfl/mbfl/mbfl_language.h @@ -57,6 +57,7 @@ enum mbfl_no_language { mbfl_no_language_simplified_chinese, /* zh-cn */ mbfl_no_language_traditional_chinese, /* zh-tw */ mbfl_no_language_russian, /* ru */ + mbfl_no_language_ukrainian, /* ua */ mbfl_no_language_armenian, /* hy */ mbfl_no_language_turkish, /* tr */ mbfl_no_language_max diff --git a/ext/mbstring/libmbfl/nls/Makefile.am b/ext/mbstring/libmbfl/nls/Makefile.am index ca81f902c8..454a07c638 100644 --- a/ext/mbstring/libmbfl/nls/Makefile.am +++ b/ext/mbstring/libmbfl/nls/Makefile.am @@ -2,4 +2,25 @@ EXTRA_DIST=Makefile.bcc32 noinst_LTLIBRARIES=libmbfl_nls.la INCLUDES=-I../mbfl libmbfl_nls_la_LDFLAGS=-version-info $(SHLIB_VERSION) -libmbfl_nls_la_SOURCES=nls_ja.c nls_de.c nls_en.c nls_hy.c nls_kr.c nls_ru.c nls_zh.c nls_uni.c nls_neutral.c nls_ja.h nls_de.h nls_en.h nls_hy.h nls_kr.h nls_ru.h nls_zh.h nls_uni.h nls_neutral.h +libmbfl_nls_la_SOURCES=nls_ja.c \ + nls_de.c \ + nls_en.c \ + nls_hy.c \ + nls_tr.c \ + nls_kr.c \ + nls_ru.c \ + nls_ua.c \ + nls_zh.c \ + nls_uni.c \ + nls_neutral.c \ + nls_ja.h \ + nls_de.h \ + nls_en.h \ + nls_hy.h \ + nls_tr.h \ + nls_kr.h \ + nls_ru.h \ + nls_ua.h \ + nls_zh.h \ + nls_uni.h \ + nls_neutral.h diff --git a/ext/mbstring/libmbfl/nls/Makefile.bcc32 b/ext/mbstring/libmbfl/nls/Makefile.bcc32 index 444e88c52b..dea8689cb1 100644 --- a/ext/mbstring/libmbfl/nls/Makefile.bcc32 +++ b/ext/mbstring/libmbfl/nls/Makefile.bcc32 @@ -1,6 +1,16 @@ !include ..\rules.mak.bcc32 INCLUDES=$(INCLUDES) -I..\mbfl -OBJS=nls_ja.obj nls_de.obj nls_en.obj nls_hy.obj nls_kr.obj nls_ru.obj nls_zh.obj nls_uni.obj nls_neutral.obj +OBJS=nls_ja.obj \ + nls_de.obj \ + nls_en.obj \ + nls_hy.obj \ + nls_tr.obj \ + nls_kr.obj \ + nls_ru.obj \ + nls_ua.obj \ + nls_zh.obj \ + nls_uni.obj \ + nls_neutral.obj all: $(OBJS) diff --git a/ext/mbstring/libmbfl/nls/nls_ua.c b/ext/mbstring/libmbfl/nls/nls_ua.c new file mode 100644 index 0000000000..85fe9b49f9 --- /dev/null +++ b/ext/mbstring/libmbfl/nls/nls_ua.c @@ -0,0 +1,22 @@ +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + + +#ifdef HAVE_STDDEF_H +#include +#endif + + +#include "mbfilter.h" +#include "nls_ua.h" + +const mbfl_language mbfl_language_ukrainian = { + mbfl_no_language_ukrainian, + "Ukrainian", + "ua", + NULL, + mbfl_no_encoding_koi8u, + mbfl_no_encoding_qprint, + mbfl_no_encoding_8bit +}; diff --git a/ext/mbstring/libmbfl/nls/nls_ua.h b/ext/mbstring/libmbfl/nls/nls_ua.h new file mode 100644 index 0000000000..0efa8d1f08 --- /dev/null +++ b/ext/mbstring/libmbfl/nls/nls_ua.h @@ -0,0 +1,9 @@ +#ifndef MBFL_NLS_UA_H +#define MBFL_NLS_UA_H + +#include "mbfilter.h" +#include "nls_ua.h" + +extern const mbfl_language mbfl_language_ukrainian; + +#endif /* MBFL_NLS_UA_H */ diff --git a/ext/mbstring/libmbfl/tests/Makefile.am b/ext/mbstring/libmbfl/tests/Makefile.am new file mode 100644 index 0000000000..8e857bc22e --- /dev/null +++ b/ext/mbstring/libmbfl/tests/Makefile.am @@ -0,0 +1,10 @@ +SUBDIRS=conv_encoding.tests conv_kana.tests strwidth.tests strcut.tests +noinst_PROGRAMS=conv_encoding conv_kana strwidth strcut +conv_encoding_SOURCES=conv_encoding.c +conv_encoding_LDADD=../mbfl/libmbfl.la +conv_kana_SOURCES=conv_kana.c +conv_kana_LDADD=../mbfl/libmbfl.la +strwidth_SOURCES=strwidth.c +strwidth_LDADD=../mbfl/libmbfl.la +strcut_SOURCES=strcut.c +strcut_LDADD=../mbfl/libmbfl.la diff --git a/ext/mbstring/libmbfl/tests/conv_encoding.c b/ext/mbstring/libmbfl/tests/conv_encoding.c new file mode 100644 index 0000000000..9769964743 --- /dev/null +++ b/ext/mbstring/libmbfl/tests/conv_encoding.c @@ -0,0 +1,104 @@ +/** + * this is a small sample script to use libmbfl. + * Rui Hirokawa + * + * this file is encoded in EUC-JP. + */ + +#include +#include +#include +#include "mbfl/mbfilter.h" + +static void hexdump(const mbfl_string *ptr) +{ + unsigned int i; + + for (i = 0; i < ptr->len; i++) { + printf("%%%02x", ptr->val[i]); + } + + printf(" (%u)\n", ptr->len); +} + +int main(int argc, char **argv) +{ + enum mbfl_no_encoding from_encoding, to_encoding; + enum mbfl_no_language no_language; + mbfl_buffer_converter *convd = NULL; + mbfl_memory_device dev; + mbfl_string string, result, *ret; + int final = 0; + int state = 0; + + if (argc < 4) { + fprintf(stderr, "Usage: %s lang to_encoding from_encoding\n", argv[0]); + return EXIT_FAILURE; + } + + if ((no_language = mbfl_name2no_language(argv[1])) == + mbfl_no_language_invalid) { + printf("Unsupported NLS: %s\n", argv[1]); + return EXIT_FAILURE; + } + + if ((to_encoding = mbfl_name2no_encoding(argv[2])) == + mbfl_no_encoding_invalid) { + printf("Unsupported encoding: %s\n", argv[2]); + return EXIT_FAILURE; + } + + if ((from_encoding = mbfl_name2no_encoding(argv[3])) == + mbfl_no_encoding_invalid) { + printf("Unsupported encoding: %s\n", argv[3]); + return EXIT_FAILURE; + } + + convd = mbfl_buffer_converter_new(from_encoding, to_encoding, 0); + + do { + mbfl_memory_device_init(&dev, 0, 4096); + mbfl_string_init_set(&string, no_language, from_encoding); + + for (;;) { + const int c = fgetc(stdin); + + if (c == EOF) { + final = 1; + break; + } else if (c == 10) { + if (state == 1) { + state = 0; + continue; + } + break; + } else if (c == 13) { + state = 1; + break; + } + + if (dev.pos >= dev.length) { + if (dev.length + dev.allocsz < dev.length) { + printf("Unable to allocate memory\n"); + return EXIT_FAILURE; + } + + mbfl_memory_device_realloc(&dev, dev.length + dev.allocsz, + dev.allocsz); + } + + dev.buffer[dev.pos++] = (unsigned char)c; + } + + mbfl_memory_device_result(&dev, &string); + mbfl_string_init_set(&result, no_language, to_encoding); + ret = mbfl_buffer_converter_feed_result(convd, &string, &result); + hexdump(&result); + mbfl_string_clear(&result); + mbfl_string_clear(&string); + } while (!final); + + mbfl_buffer_converter_delete(convd); + + return EXIT_SUCCESS; +} diff --git a/ext/mbstring/libmbfl/tests/conv_encoding.tests/Makefile.am b/ext/mbstring/libmbfl/tests/conv_encoding.tests/Makefile.am new file mode 100644 index 0000000000..37713c3952 --- /dev/null +++ b/ext/mbstring/libmbfl/tests/conv_encoding.tests/Makefile.am @@ -0,0 +1 @@ +EXTRA_DIST=*.exp diff --git a/ext/mbstring/libmbfl/tests/conv_encoding.tests/cp51932_cp50220raw.exp b/ext/mbstring/libmbfl/tests/conv_encoding.tests/cp51932_cp50220raw.exp new file mode 100644 index 0000000000..0e63ef11b7 --- /dev/null +++ b/ext/mbstring/libmbfl/tests/conv_encoding.tests/cp51932_cp50220raw.exp @@ -0,0 +1,33 @@ +#!/usr/bin/expect -f +spawn tests/conv_encoding Japanese CP50220raw eucJP-win +set timeout 1 + +expect_after { + "\[^\r\n\]*\r\n" { fail $test } +} + +set test "81 - 87ku" +send "\xf5\xba\xf6\xec\xf7\xc9\xf8\xb3\xf9\xa1\xfa\xa1\xfb\xa1\r" +expect { + "%1b%24%42%75%3a%76%6c%77%49%78%33%79%21%7a%21%7b%21%1b%28%42 (20)\r\n" { pass $test } +} + + +set test "kanji + kana" +send "ÆüËÜ¸ì¥Æ¥¹¥È\r" +expect { + "%1b%24%42%46%7c%4b%5c%38%6c%25%46%25%39%25%48%1b%28%42 (18)\r\n" { pass $test } +} + +set test "full-width numerics" +send "£°£±£²£³£´£µ£¶£·£¸£¹\r" +expect { + "%1b%24%42%23%30%23%31%23%32%23%33%23%34%23%35%23%36%23%37%23%38%23%39%1b%28%42 (26)\r\n" { pass $test } +} + +set test "full-width numerics" +send "­Î" +expect { + "%1b%24%42%2d%42%1b%28%42 (8)\r\n" { pass $test } +} + diff --git a/ext/mbstring/libmbfl/tests/conv_encoding.tests/ujis_sjis.exp b/ext/mbstring/libmbfl/tests/conv_encoding.tests/ujis_sjis.exp new file mode 100644 index 0000000000..882953f71b --- /dev/null +++ b/ext/mbstring/libmbfl/tests/conv_encoding.tests/ujis_sjis.exp @@ -0,0 +1,35 @@ +#!/usr/bin/expect -f +spawn tests/conv_encoding Japanese Shift_JIS EUC-JP +set timeout 1 + +expect_after { + "\[^\r\n\]*\r\n" { fail $test } +} + +set test "basic test" +send "testtest\r" +expect { + "%74%65%73%74%74%65%73%74 (8)\r\n" { pass $test } +} + + +set test "kanji + kana" +send "ÆüËÜ¸ì¥Æ¥¹¥È\r" +expect { + "%93%fa%96%7b%8c%ea%83%65%83%58%83%67 (12)\r\n" { pass $test } +} + +set test "full-width numerics" +send "£°£±£²£³£´£µ£¶£·£¸£¹\r" +expect { + "%82%4f%82%50%82%51%82%52%82%53%82%54%82%55%82%56%82%57%82%58 (20)\r\n" { pass $test } +} + +set test "full-width numerics" +send "­Î" +expect { + "%3f (1)\r\n" { pass $test } +} + +close +# vim: sts=4 ts=4 sw=4 et encoding=EUC-JP diff --git a/ext/mbstring/libmbfl/tests/conv_encoding.tests/utf8_sjis.exp b/ext/mbstring/libmbfl/tests/conv_encoding.tests/utf8_sjis.exp new file mode 100644 index 0000000000..e51b5e4d6e --- /dev/null +++ b/ext/mbstring/libmbfl/tests/conv_encoding.tests/utf8_sjis.exp @@ -0,0 +1,35 @@ +#!/usr/bin/expect -f +spawn tests/conv_encoding Japanese Shift_JIS UTF-8 +set timeout 1 + +expect_after { + "\[^\r\n\]*\r\n" { fail $test } +} + +set test "basic test" +send "testtest\r" +expect { + "%74%65%73%74%74%65%73%74 (8)\r\n" { pass $test } +} + + +set test "kanji + kana" +send "日本語テスト\r" +expect { + "%93%fa%96%7b%8c%ea%83%65%83%58%83%67 (12)\r\n" { pass $test } +} + +set test "full-width numerics" +send "0123456789\r" +expect { + "%82%4f%82%50%82%51%82%52%82%53%82%54%82%55%82%56%82%57%82%58 (20)\r\n" { pass $test } +} + +set test "full-width numerics" +send "㍊" +expect { + "%3f (1)\r\n" { pass $test } +} + +close +# vim: sts=4 ts=4 sw=4 et encoding=EUC-JP diff --git a/ext/mbstring/libmbfl/tests/conv_kana.c b/ext/mbstring/libmbfl/tests/conv_kana.c new file mode 100644 index 0000000000..c12fdc2f9b --- /dev/null +++ b/ext/mbstring/libmbfl/tests/conv_kana.c @@ -0,0 +1,147 @@ +/** + * this is a small sample script to use libmbfl. + * Rui Hirokawa + * + * this file is encoded in EUC-JP. + */ + +#include +#include +#include +#include "mbfl/mbfilter.h" + +static void hexdump(const mbfl_string *ptr) +{ + unsigned int i; + + for (i = 0; i < ptr->len; i++) { + printf("%%%02x", ptr->val[i]); + } + + printf(" (%u)\n", ptr->len); +} + +int main(int argc, char **argv) +{ + enum mbfl_no_encoding no_enc; + const enum mbfl_no_language no_lang = mbfl_no_language_japanese; + mbfl_memory_device dev; + mbfl_string string, result; + int final = 0; + int state = 0; + int mode = 0; + + if (argc < 3) { + fprintf(stderr, "Usage: %s encoding flags\n", argv[0]); + return EXIT_FAILURE; + } + + if ((no_enc = mbfl_name2no_encoding(argv[1])) == + mbfl_no_encoding_invalid) { + printf("Unsupported encoding: %s\n", argv[1]); + return EXIT_FAILURE; + } + + { + const char *p; + + for (p= argv[2] + strlen(argv[2]); p > argv[2]; ) { + switch (*(--p)) { + case 'A': + mode |= 0x1; + break; + case 'a': + mode |= 0x10; + break; + case 'R': + mode |= 0x2; + break; + case 'r': + mode |= 0x20; + break; + case 'N': + mode |= 0x4; + break; + case 'n': + mode |= 0x40; + break; + case 'S': + mode |= 0x8; + break; + case 's': + mode |= 0x80; + break; + case 'K': + mode |= 0x100; + break; + case 'k': + mode |= 0x1000; + break; + case 'H': + mode |= 0x200; + break; + case 'h': + mode |= 0x2000; + break; + case 'V': + mode |= 0x800; + break; + case 'C': + mode |= 0x10000; + break; + case 'c': + mode |= 0x20000; + break; + case 'M': + mode |= 0x100000; + break; + case 'm': + mode |= 0x200000; + break; + } + } + } + + do { + mbfl_memory_device_init(&dev, 0, 4096); + mbfl_string_init_set(&string, no_lang, no_enc); + + for (;;) { + const int c = fgetc(stdin); + + if (c == EOF) { + final = 1; + break; + } else if (c == 10) { + if (state == 1) { + state = 0; + continue; + } + break; + } else if (c == 13) { + state = 1; + break; + } + + if (dev.pos >= dev.length) { + if (dev.length + dev.allocsz < dev.length) { + printf("Unable to allocate memory\n"); + return EXIT_FAILURE; + } + + mbfl_memory_device_realloc(&dev, dev.length + dev.allocsz, + dev.allocsz); + } + + dev.buffer[dev.pos++] = (unsigned char)c; + } + + mbfl_memory_device_result(&dev, &string); + mbfl_ja_jp_hantozen(&string, &result, mode); + hexdump(&result); + mbfl_string_clear(&result); + mbfl_string_clear(&string); + } while (!final); + + return EXIT_SUCCESS; +} diff --git a/ext/mbstring/libmbfl/tests/strcut.c b/ext/mbstring/libmbfl/tests/strcut.c new file mode 100644 index 0000000000..2d6a873205 --- /dev/null +++ b/ext/mbstring/libmbfl/tests/strcut.c @@ -0,0 +1,113 @@ +/** + * this is a small sample script to use libmbfl. + * Rui Hirokawa + * + * this file is encoded in EUC-JP. + */ + +#include +#include +#include +#include +#include "mbfl/mbfilter.h" + +static void hexdump(const mbfl_string *ptr) +{ + unsigned int i; + + for (i = 0; i < ptr->len; i++) { + printf("%%%02x", ptr->val[i]); + } + + printf(" (%u)\n", ptr->len); +} + +int main(int argc, char **argv) +{ + enum mbfl_no_encoding no_encoding; + enum mbfl_no_language no_language; + mbfl_memory_device dev; + mbfl_string string; + int offset, length; + int final = 0; + int state = 0; + + if (argc < 5) { + fprintf(stderr, "Usage: %s lang encoding offset length\n", argv[0]); + return EXIT_FAILURE; + } + + if ((no_language = mbfl_name2no_language(argv[1])) == + mbfl_no_language_invalid) { + printf("Unsupported NLS: %s\n", argv[1]); + return EXIT_FAILURE; + } + + if ((no_encoding = mbfl_name2no_encoding(argv[2])) == + mbfl_no_encoding_invalid) { + printf("Unsupported encoding: %s\n", argv[2]); + return EXIT_FAILURE; + } + + errno = 0; + offset = strtol(argv[3], NULL, 10); + if (errno) { + printf("Invalid offset: %s\n", argv[3]); + return EXIT_FAILURE; + } + + length = strtol(argv[4], NULL, 10); + if (errno) { + printf("Invalid length: %s\n", argv[4]); + return EXIT_FAILURE; + } + + + do { + mbfl_string result; + + mbfl_memory_device_init(&dev, 0, 4096); + mbfl_string_init_set(&string, no_language, no_encoding); + + for (;;) { + const int c = fgetc(stdin); + + if (c == EOF) { + final = 1; + break; + } else if (c == 10) { + if (state == 1) { + state = 0; + continue; + } + break; + } else if (c == 13) { + state = 1; + break; + } + + if (dev.pos >= dev.length) { + if (dev.length + dev.allocsz < dev.length) { + printf("Unable to allocate memory\n"); + return EXIT_FAILURE; + } + + mbfl_memory_device_realloc(&dev, dev.length + dev.allocsz, + dev.allocsz); + } + + dev.buffer[dev.pos++] = (unsigned char)c; + } + + mbfl_memory_device_result(&dev, &string); + if (mbfl_strcut(&string, &result, offset, length)) { + hexdump(&result); + mbfl_string_clear(&result); + } else { + printf("***ERROR***\n"); + } + mbfl_string_clear(&string); + } while (!final); + + return EXIT_SUCCESS; +} diff --git a/ext/mbstring/libmbfl/tests/strcut.tests/Makefile.am b/ext/mbstring/libmbfl/tests/strcut.tests/Makefile.am new file mode 100644 index 0000000000..37713c3952 --- /dev/null +++ b/ext/mbstring/libmbfl/tests/strcut.tests/Makefile.am @@ -0,0 +1 @@ +EXTRA_DIST=*.exp diff --git a/ext/mbstring/libmbfl/tests/strcut.tests/iso2022jp.exp b/ext/mbstring/libmbfl/tests/strcut.tests/iso2022jp.exp new file mode 100644 index 0000000000..f203bbf5f0 --- /dev/null +++ b/ext/mbstring/libmbfl/tests/strcut.tests/iso2022jp.exp @@ -0,0 +1,129 @@ +#!/usr/bin/expect -f +proc begin_strcut_test {_from _length} { + global spawn_id from length + set from $_from + set length $_length + + spawn tests/strcut Japanese "ISO-2022-JP" $_from $_length + set timeout 10 + + expect_after { + "\[^\r\n\]*\r\n" { fail $test } + } +} + +begin_strcut_test -1 2 + +set test "asciish characters ($from, $length)" +send "testtest\r" +expect { + -ex "***ERROR***\r\n" { pass $test } +} + +set test "non-asciish characters ($from, $length)" +send "\x1b\$B%F%9%H%F%9%H\x1b(B\r" +sleep 1 +expect { + -ex "***ERROR***\r\n" { pass $test } +} + +close +begin_strcut_test 2 -1 + +set test "asciish characters ($from, $length)" +send "testtest\r" +expect { + -ex "***ERROR***\r\n" { pass $test } +} + +set test "non-asciish characters ($from, $length)" +send "\x1b\$B%F%9%H%F%9%H\x1b(B\r" +sleep 1 +expect { + -ex "***ERROR***\r\n" { pass $test } +} + +close +begin_strcut_test 3 2 + +set test "asciish characters ($from, $length)" +send "testtest\r" +expect { + -ex "%74%74 (2)\r\n" { pass $test } +} + +set test "non-asciish characters ($from, $length)" +send "\x1b\$B%F%9%H%F%9%H\x1b(B\r" +sleep 1 +expect { + -ex " (0)\r\n" { pass $test } +} + +close +begin_strcut_test 5 8 + +set test "asciish characters ($from, $length)" +send "testtest\r" +expect { + -ex "%65%73%74 (3)\r\n" { pass $test } +} + +set test "non-asciish characters ($from, $length)" +sleep 1 +send "\x1b\$B%F%9%H%F%9%H\x1b(B\r" +sleep 1 +expect { + -ex "%1b%24%42%25%39%1b%28%42 (8)\r\n" { pass $test } +} + +close +begin_strcut_test 1 15 + +set test "asciish characters ($from, $length)" +send "testestestestestes\r" +expect { + "%65%73%74%65%73%74%65%73%74%65%73%74%65%73%74 (15)\r\n" { pass $test } +} + +set test "non-asciish characters ($from, $length)" +send "\x1b\$B%F%9%H%F%9%H\x1b(B\r" +sleep 1 +expect { + -ex "%1b%24%42%25%46%25%39%25%48%25%46%1b%28%42 (14)\r\n" { pass $test } +} +close +begin_strcut_test 8 20 + +set test "non-asciish characters (2) ($from, $length)" +send "\x1b\x24\x42\x25\x46\x1b\x28\x42\x74\x1b\x24\x42\x25\x39\x1b\x28\x42\x74\x1b\x24\x42\x25\x48\x24\x46\x24\x39\x24\x48\x1b\x28\x49\x4a\x5e\x4a\x5e\x4a\x5e\x43\x3d\x44\x1b\x28\x42\x74\x1b\x24\x42\x25\x46\x1b\x28\x42\x74\x1b\x24\x42\x25\x39\x1b\x28\x42\x74\x1b\x24\x42\x25\x48\x1b\x28\x42\x74\x1b\x24\x42\x25\x46\x1b\x28\x42\x74\x1b\x24\x42\x25\x39\x1b\x28\x42\x74\x1b\x24\x42\x25\x48\x1b\x28\x42\x74\x1b\x24\x42\x25\x46\x1b\x28\x42\x74\x1b\x24\x42\x25\x39\x1b\x28\x42\x74\x1b\x24\x42\x25\x48\x1b\x28\x42\x74\r" +sleep 1 +expect { + -ex "%74%1b%24%42%25%39%1b%28%42%74%1b%24%42%25%48%24%46%1b%28%42 (20)\r\n" { + pass $test + } +} + +begin_strcut_test 8 21 + +set test "non-asciish characters (2) ($from, $length)" +sleep 1 +send "\x1b\x24\x42\x25\x46\x1b\x28\x42\x74\x1b\x24\x42\x25\x39\x1b\x28\x42\x74\x1b\x24\x42\x25\x48\x24\x46\x24\x39\x24\x48\x1b\x28\x49\x4a\x5e\x4a\x5e\x4a\x5e\x43\x3d\x44\x1b\x28\x42\x74\x1b\x24\x42\x25\x46\x1b\x28\x42\x74\x1b\x24\x42\x25\x39\x1b\x28\x42\x74\x1b\x24\x42\x25\x48\x1b\x28\x42\x74\x1b\x24\x42\x25\x46\x1b\x28\x42\x74\x1b\x24\x42\x25\x39\x1b\x28\x42\x74\x1b\x24\x42\x25\x48\x1b\x28\x42\x74\x1b\x24\x42\x25\x46\x1b\x28\x42\x74\x1b\x24\x42\x25\x39\x1b\x28\x42\x74\x1b\x24\x42\x25\x48\x1b\x28\x42\x74\r" +expect { + -ex "%74%1b%24%42%25%39%1b%28%42%74%1b%24%42%25%48%24%46%1b%28%42 (20)\r\n" { + pass $test + } +} + +begin_strcut_test 11 17 + +set test "non-asciish characters (2) ($from, $length)" +sleep 1 +send "\x1b\x24\x42\x25\x46\x1b\x28\x42\x74\x1b\x24\x42\x25\x39\x1b\x28\x42\x74\x1b\x24\x42\x25\x48\x24\x46\x24\x39\x24\x48\x1b\x28\x49\x4a\x5e\x4a\x5e\x4a\x5e\x43\x3d\x44\x1b\x28\x42\x74\x1b\x24\x42\x25\x46\x1b\x28\x42\x74\x1b\x24\x42\x25\x39\x1b\x28\x42\x74\x1b\x24\x42\x25\x48\x1b\x28\x42\x74\x1b\x24\x42\x25\x46\x1b\x28\x42\x74\x1b\x24\x42\x25\x39\x1b\x28\x42\x74\x1b\x24\x42\x25\x48\x1b\x28\x42\x74\x1b\x24\x42\x25\x46\x1b\x28\x42\x74\x1b\x24\x42\x25\x39\x1b\x28\x42\x74\x1b\x24\x42\x25\x48\x1b\x28\x42\x74\r" +expect { + -ex "%1b%24%42%25%39%1b%28%42%74%1b%24%42%25%48%1b%28%42 (17)\r\n" { + pass $test + } +} + + +# vim: sts=4 sw=4 ts=4 et diff --git a/ext/mbstring/libmbfl/tests/strcut.tests/ujis.exp b/ext/mbstring/libmbfl/tests/strcut.tests/ujis.exp new file mode 100644 index 0000000000..8ad6f95635 --- /dev/null +++ b/ext/mbstring/libmbfl/tests/strcut.tests/ujis.exp @@ -0,0 +1,91 @@ +#!/usr/bin/expect -f +proc begin_strcut_test {_from _length} { + global spawn_id from length + set from $_from + set length $_length + + spawn tests/strcut Japanese EUC-JP $_from $_length + set timeout 1 + + expect_after { + "\[^\r\n\]*\r\n" { fail $test } + } +} + +begin_strcut_test -1 2 + +set test "asciish characters ($from, $length)" +send "testtest\r" +expect { + -ex "***ERROR***\r\n" { pass $test } +} + +set test "non-asciish characters ($from, $length)" +send "¥Æ¥¹¥È¥Æ¥¹¥È\r" +expect { + -ex "***ERROR***\r\n" { pass $test } +} + +close +begin_strcut_test 2 -1 + +set test "asciish characters ($from, $length)" +send "testtest\r" +expect { + -ex "***ERROR***\r\n" { pass $test } +} + +set test "non-asciish characters ($from, $length)" +send "¥Æ¥¹¥È¥Æ¥¹¥È\r" +expect { + -ex "***ERROR***\r\n" { pass $test } +} + +close +begin_strcut_test 3 2 + +set test "asciish characters ($from, $length)" +send "testtest\r" +expect { + -ex "%74%74 (2)\r\n" { pass $test } +} + +set test "non-asciish characters ($from, $length)" +send "¥Æ¥¹¥È¥Æ¥¹¥È\r" +expect { + -ex "%a5%b9 (2)\r\n" { pass $test } +} + +close +begin_strcut_test 5 8 + +set test "asciish characters ($from, $length)" +send "testtest\r" +expect { + -ex "%65%73%74 (3)\r\n" { pass $test } +} + +set test "non-asciish characters ($from, $length)" +send "¥Æ¥¹¥È¥Æ¥¹¥È\r" +expect { + -ex "%a5%c8%a5%c6%a5%b9%a5%c8 (8)\r\n" { pass $test } +} + +close +begin_strcut_test 1 15 + +set test "asciish characters ($from, $length)" +send "testestestestestes\r" +expect { + "%65%73%74%65%73%74%65%73%74%65%73%74%65%73%74 (15)\r\n" { pass $test } +} + +set test "non-asciish characters ($from, $length)" +send "¥Æ¥¹¥È¥Æ¥¹¥È\r" +expect { + -ex "%a5%c6%a5%b9%a5%c8%a5%c6%a5%b9%a5%c8 (12)\r\n" { pass $test } +} +close + + +# vim: sts=4 sw=4 ts=4 et encoding=EUC-JP diff --git a/ext/mbstring/libmbfl/tests/strcut.tests/utf8.exp b/ext/mbstring/libmbfl/tests/strcut.tests/utf8.exp new file mode 100644 index 0000000000..5104bf1905 --- /dev/null +++ b/ext/mbstring/libmbfl/tests/strcut.tests/utf8.exp @@ -0,0 +1,91 @@ +#!/usr/bin/expect -f +proc begin_strcut_test {_from _length} { + global spawn_id from length + set from $_from + set length $_length + + spawn tests/strcut Japanese UTF-8 $_from $_length + set timeout 1 + + expect_after { + "\[^\r\n\]*\r\n" { fail $test } + } +} + +begin_strcut_test -1 2 + +set test "asciish characters ($from, $length)" +send "testtest\r" +expect { + -ex "***ERROR***\r\n" { pass $test } +} + +set test "non-asciish characters ($from, $length)" +send "テストテスト\r" +expect { + -ex "***ERROR***\r\n" { pass $test } +} + +close +begin_strcut_test 2 -1 + +set test "asciish characters ($from, $length)" +send "testtest\r" +expect { + -ex "***ERROR***\r\n" { pass $test } +} + +set test "non-asciish characters ($from, $length)" +send "テストテスト\r" +expect { + -ex "***ERROR***\r\n" { pass $test } +} + +close +begin_strcut_test 3 2 + +set test "asciish characters ($from, $length)" +send "testtest\r" +expect { + -ex "%74%74 (2)\r\n" { pass $test } +} + +set test "non-asciish characters ($from, $length)" +send "テストテスト\r" +expect { + -ex "(0)\r\n" { pass $test } +} + +close +begin_strcut_test 5 8 + +set test "asciish characters ($from, $length)" +send "testtest\r" +expect { + -ex "%65%73%74 (3)\r\n" { pass $test } +} + +set test "non-asciish characters ($from, $length)" +send "テストテスト\r" +expect { + -ex "%e3%82%b9%e3%83%88 (6)\r\n" { pass $test } +} + +close +begin_strcut_test 1 15 + +set test "asciish characters ($from, $length)" +send "testestestestestes\r" +expect { + "%65%73%74%65%73%74%65%73%74%65%73%74%65%73%74 (15)\r\n" { pass $test } +} + +set test "non-asciish characters ($from, $length)" +send "テストテスト\r" +expect { + -ex "%e3%83%86%e3%82%b9%e3%83%88%e3%83%86%e3%82%b9 (15)\r\n" { pass $test } +} +close + + +# vim: sts=4 sw=4 ts=4 et encoding=UTF-8 diff --git a/ext/mbstring/libmbfl/tests/strwidth.tests/Makefile.am b/ext/mbstring/libmbfl/tests/strwidth.tests/Makefile.am new file mode 100644 index 0000000000..37713c3952 --- /dev/null +++ b/ext/mbstring/libmbfl/tests/strwidth.tests/Makefile.am @@ -0,0 +1 @@ +EXTRA_DIST=*.exp diff --git a/ext/mbstring/libmbfl/tests/strwidth.tests/conv_encoding.c b/ext/mbstring/libmbfl/tests/strwidth.tests/conv_encoding.c new file mode 100644 index 0000000000..9769964743 --- /dev/null +++ b/ext/mbstring/libmbfl/tests/strwidth.tests/conv_encoding.c @@ -0,0 +1,104 @@ +/** + * this is a small sample script to use libmbfl. + * Rui Hirokawa + * + * this file is encoded in EUC-JP. + */ + +#include +#include +#include +#include "mbfl/mbfilter.h" + +static void hexdump(const mbfl_string *ptr) +{ + unsigned int i; + + for (i = 0; i < ptr->len; i++) { + printf("%%%02x", ptr->val[i]); + } + + printf(" (%u)\n", ptr->len); +} + +int main(int argc, char **argv) +{ + enum mbfl_no_encoding from_encoding, to_encoding; + enum mbfl_no_language no_language; + mbfl_buffer_converter *convd = NULL; + mbfl_memory_device dev; + mbfl_string string, result, *ret; + int final = 0; + int state = 0; + + if (argc < 4) { + fprintf(stderr, "Usage: %s lang to_encoding from_encoding\n", argv[0]); + return EXIT_FAILURE; + } + + if ((no_language = mbfl_name2no_language(argv[1])) == + mbfl_no_language_invalid) { + printf("Unsupported NLS: %s\n", argv[1]); + return EXIT_FAILURE; + } + + if ((to_encoding = mbfl_name2no_encoding(argv[2])) == + mbfl_no_encoding_invalid) { + printf("Unsupported encoding: %s\n", argv[2]); + return EXIT_FAILURE; + } + + if ((from_encoding = mbfl_name2no_encoding(argv[3])) == + mbfl_no_encoding_invalid) { + printf("Unsupported encoding: %s\n", argv[3]); + return EXIT_FAILURE; + } + + convd = mbfl_buffer_converter_new(from_encoding, to_encoding, 0); + + do { + mbfl_memory_device_init(&dev, 0, 4096); + mbfl_string_init_set(&string, no_language, from_encoding); + + for (;;) { + const int c = fgetc(stdin); + + if (c == EOF) { + final = 1; + break; + } else if (c == 10) { + if (state == 1) { + state = 0; + continue; + } + break; + } else if (c == 13) { + state = 1; + break; + } + + if (dev.pos >= dev.length) { + if (dev.length + dev.allocsz < dev.length) { + printf("Unable to allocate memory\n"); + return EXIT_FAILURE; + } + + mbfl_memory_device_realloc(&dev, dev.length + dev.allocsz, + dev.allocsz); + } + + dev.buffer[dev.pos++] = (unsigned char)c; + } + + mbfl_memory_device_result(&dev, &string); + mbfl_string_init_set(&result, no_language, to_encoding); + ret = mbfl_buffer_converter_feed_result(convd, &string, &result); + hexdump(&result); + mbfl_string_clear(&result); + mbfl_string_clear(&string); + } while (!final); + + mbfl_buffer_converter_delete(convd); + + return EXIT_SUCCESS; +} diff --git a/ext/mbstring/libmbfl/tests/strwidth.tests/conv_encoding.tests/Makefile.am b/ext/mbstring/libmbfl/tests/strwidth.tests/conv_encoding.tests/Makefile.am new file mode 100644 index 0000000000..37713c3952 --- /dev/null +++ b/ext/mbstring/libmbfl/tests/strwidth.tests/conv_encoding.tests/Makefile.am @@ -0,0 +1 @@ +EXTRA_DIST=*.exp diff --git a/ext/mbstring/libmbfl/tests/strwidth.tests/conv_encoding.tests/cp51932_cp50220raw.exp b/ext/mbstring/libmbfl/tests/strwidth.tests/conv_encoding.tests/cp51932_cp50220raw.exp new file mode 100644 index 0000000000..0e63ef11b7 --- /dev/null +++ b/ext/mbstring/libmbfl/tests/strwidth.tests/conv_encoding.tests/cp51932_cp50220raw.exp @@ -0,0 +1,33 @@ +#!/usr/bin/expect -f +spawn tests/conv_encoding Japanese CP50220raw eucJP-win +set timeout 1 + +expect_after { + "\[^\r\n\]*\r\n" { fail $test } +} + +set test "81 - 87ku" +send "\xf5\xba\xf6\xec\xf7\xc9\xf8\xb3\xf9\xa1\xfa\xa1\xfb\xa1\r" +expect { + "%1b%24%42%75%3a%76%6c%77%49%78%33%79%21%7a%21%7b%21%1b%28%42 (20)\r\n" { pass $test } +} + + +set test "kanji + kana" +send "ÆüËÜ¸ì¥Æ¥¹¥È\r" +expect { + "%1b%24%42%46%7c%4b%5c%38%6c%25%46%25%39%25%48%1b%28%42 (18)\r\n" { pass $test } +} + +set test "full-width numerics" +send "£°£±£²£³£´£µ£¶£·£¸£¹\r" +expect { + "%1b%24%42%23%30%23%31%23%32%23%33%23%34%23%35%23%36%23%37%23%38%23%39%1b%28%42 (26)\r\n" { pass $test } +} + +set test "full-width numerics" +send "­Î" +expect { + "%1b%24%42%2d%42%1b%28%42 (8)\r\n" { pass $test } +} + diff --git a/ext/mbstring/libmbfl/tests/strwidth.tests/conv_encoding.tests/ujis_sjis.exp b/ext/mbstring/libmbfl/tests/strwidth.tests/conv_encoding.tests/ujis_sjis.exp new file mode 100644 index 0000000000..882953f71b --- /dev/null +++ b/ext/mbstring/libmbfl/tests/strwidth.tests/conv_encoding.tests/ujis_sjis.exp @@ -0,0 +1,35 @@ +#!/usr/bin/expect -f +spawn tests/conv_encoding Japanese Shift_JIS EUC-JP +set timeout 1 + +expect_after { + "\[^\r\n\]*\r\n" { fail $test } +} + +set test "basic test" +send "testtest\r" +expect { + "%74%65%73%74%74%65%73%74 (8)\r\n" { pass $test } +} + + +set test "kanji + kana" +send "ÆüËÜ¸ì¥Æ¥¹¥È\r" +expect { + "%93%fa%96%7b%8c%ea%83%65%83%58%83%67 (12)\r\n" { pass $test } +} + +set test "full-width numerics" +send "£°£±£²£³£´£µ£¶£·£¸£¹\r" +expect { + "%82%4f%82%50%82%51%82%52%82%53%82%54%82%55%82%56%82%57%82%58 (20)\r\n" { pass $test } +} + +set test "full-width numerics" +send "­Î" +expect { + "%3f (1)\r\n" { pass $test } +} + +close +# vim: sts=4 ts=4 sw=4 et encoding=EUC-JP diff --git a/ext/mbstring/libmbfl/tests/strwidth.tests/conv_encoding.tests/utf8_sjis.exp b/ext/mbstring/libmbfl/tests/strwidth.tests/conv_encoding.tests/utf8_sjis.exp new file mode 100644 index 0000000000..e51b5e4d6e --- /dev/null +++ b/ext/mbstring/libmbfl/tests/strwidth.tests/conv_encoding.tests/utf8_sjis.exp @@ -0,0 +1,35 @@ +#!/usr/bin/expect -f +spawn tests/conv_encoding Japanese Shift_JIS UTF-8 +set timeout 1 + +expect_after { + "\[^\r\n\]*\r\n" { fail $test } +} + +set test "basic test" +send "testtest\r" +expect { + "%74%65%73%74%74%65%73%74 (8)\r\n" { pass $test } +} + + +set test "kanji + kana" +send "日本語テスト\r" +expect { + "%93%fa%96%7b%8c%ea%83%65%83%58%83%67 (12)\r\n" { pass $test } +} + +set test "full-width numerics" +send "0123456789\r" +expect { + "%82%4f%82%50%82%51%82%52%82%53%82%54%82%55%82%56%82%57%82%58 (20)\r\n" { pass $test } +} + +set test "full-width numerics" +send "㍊" +expect { + "%3f (1)\r\n" { pass $test } +} + +close +# vim: sts=4 ts=4 sw=4 et encoding=EUC-JP diff --git a/ext/mbstring/libmbfl/tests/strwidth.tests/conv_kana.c b/ext/mbstring/libmbfl/tests/strwidth.tests/conv_kana.c new file mode 100644 index 0000000000..c12fdc2f9b --- /dev/null +++ b/ext/mbstring/libmbfl/tests/strwidth.tests/conv_kana.c @@ -0,0 +1,147 @@ +/** + * this is a small sample script to use libmbfl. + * Rui Hirokawa + * + * this file is encoded in EUC-JP. + */ + +#include +#include +#include +#include "mbfl/mbfilter.h" + +static void hexdump(const mbfl_string *ptr) +{ + unsigned int i; + + for (i = 0; i < ptr->len; i++) { + printf("%%%02x", ptr->val[i]); + } + + printf(" (%u)\n", ptr->len); +} + +int main(int argc, char **argv) +{ + enum mbfl_no_encoding no_enc; + const enum mbfl_no_language no_lang = mbfl_no_language_japanese; + mbfl_memory_device dev; + mbfl_string string, result; + int final = 0; + int state = 0; + int mode = 0; + + if (argc < 3) { + fprintf(stderr, "Usage: %s encoding flags\n", argv[0]); + return EXIT_FAILURE; + } + + if ((no_enc = mbfl_name2no_encoding(argv[1])) == + mbfl_no_encoding_invalid) { + printf("Unsupported encoding: %s\n", argv[1]); + return EXIT_FAILURE; + } + + { + const char *p; + + for (p= argv[2] + strlen(argv[2]); p > argv[2]; ) { + switch (*(--p)) { + case 'A': + mode |= 0x1; + break; + case 'a': + mode |= 0x10; + break; + case 'R': + mode |= 0x2; + break; + case 'r': + mode |= 0x20; + break; + case 'N': + mode |= 0x4; + break; + case 'n': + mode |= 0x40; + break; + case 'S': + mode |= 0x8; + break; + case 's': + mode |= 0x80; + break; + case 'K': + mode |= 0x100; + break; + case 'k': + mode |= 0x1000; + break; + case 'H': + mode |= 0x200; + break; + case 'h': + mode |= 0x2000; + break; + case 'V': + mode |= 0x800; + break; + case 'C': + mode |= 0x10000; + break; + case 'c': + mode |= 0x20000; + break; + case 'M': + mode |= 0x100000; + break; + case 'm': + mode |= 0x200000; + break; + } + } + } + + do { + mbfl_memory_device_init(&dev, 0, 4096); + mbfl_string_init_set(&string, no_lang, no_enc); + + for (;;) { + const int c = fgetc(stdin); + + if (c == EOF) { + final = 1; + break; + } else if (c == 10) { + if (state == 1) { + state = 0; + continue; + } + break; + } else if (c == 13) { + state = 1; + break; + } + + if (dev.pos >= dev.length) { + if (dev.length + dev.allocsz < dev.length) { + printf("Unable to allocate memory\n"); + return EXIT_FAILURE; + } + + mbfl_memory_device_realloc(&dev, dev.length + dev.allocsz, + dev.allocsz); + } + + dev.buffer[dev.pos++] = (unsigned char)c; + } + + mbfl_memory_device_result(&dev, &string); + mbfl_ja_jp_hantozen(&string, &result, mode); + hexdump(&result); + mbfl_string_clear(&result); + mbfl_string_clear(&string); + } while (!final); + + return EXIT_SUCCESS; +} diff --git a/ext/mbstring/libmbfl/tests/strwidth.tests/conv_kana.tests/Makefile.am b/ext/mbstring/libmbfl/tests/strwidth.tests/conv_kana.tests/Makefile.am new file mode 100644 index 0000000000..37713c3952 --- /dev/null +++ b/ext/mbstring/libmbfl/tests/strwidth.tests/conv_kana.tests/Makefile.am @@ -0,0 +1 @@ +EXTRA_DIST=*.exp diff --git a/ext/mbstring/libmbfl/tests/strwidth.tests/conv_kana.tests/conv_kana.exp b/ext/mbstring/libmbfl/tests/strwidth.tests/conv_kana.tests/conv_kana.exp new file mode 100644 index 0000000000..a6459fe5ac --- /dev/null +++ b/ext/mbstring/libmbfl/tests/strwidth.tests/conv_kana.tests/conv_kana.exp @@ -0,0 +1,1098 @@ +#!/usr/bin/expect -f + +set timeout 1 + +set test "full-width alphabets to half-width counterparts" +spawn tests/conv_kana EUC-JP "r" +expect_after { + "\[^\r\n\]*\r\n" { fail $test } +} +send "£á£â£ã£ä£å£æ£ç£è£é£ê£ë£ì£í£î£ï£ð£ñ£ò£ó£ô£õ£ö£÷£ø£ù£ú\r" +expect { + -ex "%61%62%63%64%65%66%67%68%69%6a%6b%6c%6d%6e%6f%70%71%72%73%74%75%76%77%78%79%7a (26)\r\n" { pass $test } +} +send "£Á£Â£Ã£Ä£Å£Æ£Ç£È£É£Ê£Ë£Ì£Í£Î£Ï£Ð£Ñ£Ò£Ó£Ô£Õ£Ö£×£Ø£Ù£Ú\r" +expect { + -ex "%41%42%43%44%45%46%47%48%49%4a%4b%4c%4d%4e%4f%50%51%52%53%54%55%56%57%58%59%5a (26)\r\n" { pass $test } +} +send "abcdefghijklmnopqrstuvwxyz\r" +expect { + -ex "%61%62%63%64%65%66%67%68%69%6a%6b%6c%6d%6e%6f%70%71%72%73%74%75%76%77%78%79%7a (26)\r\n" { pass $test } +} +send "ABCDEFGHIJKLMNOPQRSTUVWXYZ\r" +expect { + -ex "%41%42%43%44%45%46%47%48%49%4a%4b%4c%4d%4e%4f%50%51%52%53%54%55%56%57%58%59%5a (26)\r\n" { pass $test } +} +send "0123456789\r" +expect { + -ex "%30%31%32%33%34%35%36%37%38%39 (10)\r\n" { pass $test } +} +send "£°£±£²£³£´£µ£¶£·£¸£¹\r" +expect { + -ex "%a3%b1%a3%b2%a3%b3%a3%b4%a3%b5%a3%b6%a3%b7%a3%b8%a3%b9 (20)\r\n" { pass $test } +} +send "¡¡ \r" +expect { + -ex "%a1%a1%20 (3)\r\n" { pass $test } +} +send "¥¢¥¤¥¦¥¨¥ª¥¬¥®¥°¥²¥´¥Ñ¥Ô¥×¥Ú¥Ý¥«¡«¥­¡«¥¯¡«¥±¡«¥³¡«¥Ï¡¬¥Ò¡¬¥Õ¡¬¥Ø¡¬¥Û¡¬\r" +expect { + -ex "%a5%a2%a5%a4%a5%a6%a5%a8%a5%aa%a5%ac%a5%ae%a5%b0%a5%b2%a5%b4%a5%d1%a5%d4%a5%d7%a5%da%a5%dd%a5%ab%a1%ab%a5%ad%a1%ab%a5%af%a1%ab%a5%b1%a1%ab%a5%b3%a1%ab%a5%cf%a1%ac%a5%d2%a1%ac%a5%d5%a1%ac%a5%d8%a1%ac%a5%db%a1%ac (70)\r\n" { pass $test } +} +send "ޱ޲޳޴޵޶ŽÞŽ·ŽÞޏŽÞ޹ŽÞŽºŽÞŽÊŽßŽËŽßŽÌŽßŽÍŽßŽÎŽß\r" +expect { + -ex "%8e%b1%8e%b2%8e%b3%8e%b4%8e%b5%8e%b6%8e%de%8e%b7%8e%de%8e%b8%8e%de%8e%b9%8e%de%8e%ba%8e%de%8e%ca%8e%df%8e%cb%8e%df%8e%cc%8e%df%8e%cd%8e%df%8e%ce%8e%df (50)\r\n" { pass $test } +} +send "¤¢¤¤¤¦¤¨¤ª¤¬¤®¤°¤²¤´¤Ñ¤Ô¤×¤Ú¤Ý¤«¡«¤­¡«¤¯¡«¤±¡«¤³¡«¤Ï¡¬¤Ò¡¬¤Õ¡¬¤Ø¡¬¤Û¡¬\r" +expect { + -ex "%a4%a2%a4%a4%a4%a6%a4%a8%a4%aa%a4%ac%a4%ae%a4%b0%a4%b2%a4%b4%a4%d1%a4%d4%a4%d7%a4%da%a4%dd%a4%ab%a1%ab%a4%ad%a1%ab%a4%af%a1%ab%a4%b1%a1%ab%a4%b3%a1%ab%a4%cf%a1%ac%a4%d2%a1%ac%a4%d5%a1%ac%a4%d8%a1%ac%a4%db%a1%ac (70)\r\n" { pass $test } +} +close + +set test "half-width alphabets to full-width counterparts" +spawn tests/conv_kana EUC-JP "R" +expect_after { + "\[^\r\n\]*\r\n" { fail $test } +} +send "£á£â£ã£ä£å£æ£ç£è£é£ê£ë£ì£í£î£ï£ð£ñ£ò£ó£ô£õ£ö£÷£ø£ù£ú\r" +expect { + -ex "%a3%e1%a3%e2%a3%e3%a3%e4%a3%e5%a3%e6%a3%e7%a3%e8%a3%e9%a3%ea%a3%eb%a3%ec%a3%ed%a3%ee%a3%ef%a3%f0%a3%f1%a3%f2%a3%f3%a3%f4%a3%f5%a3%f6%a3%f7%a3%f8%a3%f9%a3%fa (52)\r\n" { pass $test } +} +send "£Á£Â£Ã£Ä£Å£Æ£Ç£È£É£Ê£Ë£Ì£Í£Î£Ï£Ð£Ñ£Ò£Ó£Ô£Õ£Ö£×£Ø£Ù£Ú\r" +expect { + -ex "%a3%c1%a3%c2%a3%c3%a3%c4%a3%c5%a3%c6%a3%c7%a3%c8%a3%c9%a3%ca%a3%cb%a3%cc%a3%cd%a3%ce%a3%cf%a3%d0%a3%d1%a3%d2%a3%d3%a3%d4%a3%d5%a3%d6%a3%d7%a3%d8%a3%d9%a3%da (52)\r\n" { pass $test } +} +send "0123456789\r" +expect { + -ex "%30%31%32%33%34%35%36%37%38%39 (10)\r\n" { pass $test } +} +send "£°£±£²£³£´£µ£¶£·£¸£¹\r" +expect { + -ex "%a3%b1%a3%b2%a3%b3%a3%b4%a3%b5%a3%b6%a3%b7%a3%b8%a3%b9 (20)\r\n" { pass $test } +} +send "¡¡ \r" +expect { + -ex "%a1%a1%20 (3)\r\n" { pass $test } +} +send "¥¢¥¤¥¦¥¨¥ª¥¬¥®¥°¥²¥´¥Ñ¥Ô¥×¥Ú¥Ý¥«¡«¥­¡«¥¯¡«¥±¡«¥³¡«¥Ï¡¬¥Ò¡¬¥Õ¡¬¥Ø¡¬¥Û¡¬\r" +expect { + -ex "%a5%a2%a5%a4%a5%a6%a5%a8%a5%aa%a5%ac%a5%ae%a5%b0%a5%b2%a5%b4%a5%d1%a5%d4%a5%d7%a5%da%a5%dd%a5%ab%a1%ab%a5%ad%a1%ab%a5%af%a1%ab%a5%b1%a1%ab%a5%b3%a1%ab%a5%cf%a1%ac%a5%d2%a1%ac%a5%d5%a1%ac%a5%d8%a1%ac%a5%db%a1%ac (70)\r\n" { pass $test } +} +send "ޱ޲޳޴޵޶ŽÞŽ·ŽÞޏŽÞ޹ŽÞŽºŽÞŽÊŽßŽËŽßŽÌŽßŽÍŽßŽÎŽß\r" +expect { + -ex "%8e%b1%8e%b2%8e%b3%8e%b4%8e%b5%8e%b6%8e%de%8e%b7%8e%de%8e%b8%8e%de%8e%b9%8e%de%8e%ba%8e%de%8e%ca%8e%df%8e%cb%8e%df%8e%cc%8e%df%8e%cd%8e%df%8e%ce%8e%df (50)\r\n" { pass $test } +} +send "¤¢¤¤¤¦¤¨¤ª¤¬¤®¤°¤²¤´¤Ñ¤Ô¤×¤Ú¤Ý¤«¡«¤­¡«¤¯¡«¤±¡«¤³¡«¤Ï¡¬¤Ò¡¬¤Õ¡¬¤Ø¡¬¤Û¡¬\r" +expect { + -ex "%a4%a2%a4%a4%a4%a6%a4%a8%a4%aa%a4%ac%a4%ae%a4%b0%a4%b2%a4%b4%a4%d1%a4%d4%a4%d7%a4%da%a4%dd%a4%ab%a1%ab%a4%ad%a1%ab%a4%af%a1%ab%a4%b1%a1%ab%a4%b3%a1%ab%a4%cf%a1%ac%a4%d2%a1%ac%a4%d5%a1%ac%a4%d8%a1%ac%a4%db%a1%ac (70)\r\n" { pass $test } +} +close + +set test "transliterate half-width alphabets to full-width counterparts and full-width to half-width at a time" +spawn tests/conv_kana EUC-JP "Rr" +expect_after { + "\[^\r\n\]*\r\n" { fail $test } +} +send "£á£â£ã£ä£å£æ£ç£è£é£ê£ë£ì£í£î£ï£ð£ñ£ò£ó£ô£õ£ö£÷£ø£ù£ú\r" +expect { + -ex "%61%62%63%64%65%66%67%68%69%6a%6b%6c%6d%6e%6f%70%71%72%73%74%75%76%77%78%79%7a (26)\r\n" { pass $test } +} +send "£Á£Â£Ã£Ä£Å£Æ£Ç£È£É£Ê£Ë£Ì£Í£Î£Ï£Ð£Ñ£Ò£Ó£Ô£Õ£Ö£×£Ø£Ù£Ú\r" +expect { + -ex "%41%42%43%44%45%46%47%48%49%4a%4b%4c%4d%4e%4f%50%51%52%53%54%55%56%57%58%59%5a (26)\r\n" { pass $test } +} +send "abcdefghijklmnopqrstuvwxyz\r" +expect { + -ex "%a3%e1%a3%e2%a3%e3%a3%e4%a3%e5%a3%e6%a3%e7%a3%e8%a3%e9%a3%ea%a3%eb%a3%ec%a3%ed%a3%ee%a3%ef%a3%f0%a3%f1%a3%f2%a3%f3%a3%f4%a3%f5%a3%f6%a3%f7%a3%f8%a3%f9%a3%fa (52)\r\n" { pass $test } +} +send "ABCDEFGHIJKLMNOPQRSTUVWXYZ\r" +expect { + -ex "%a3%c1%a3%c2%a3%c3%a3%c4%a3%c5%a3%c6%a3%c7%a3%c8%a3%c9%a3%ca%a3%cb%a3%cc%a3%cd%a3%ce%a3%cf%a3%d0%a3%d1%a3%d2%a3%d3%a3%d4%a3%d5%a3%d6%a3%d7%a3%d8%a3%d9%a3%da (52)\r\n" { pass $test } +} +send "0123456789\r" +expect { + -ex "%30%31%32%33%34%35%36%37%38%39 (10)\r\n" { pass $test } +} +send "£°£±£²£³£´£µ£¶£·£¸£¹\r" +expect { + -ex "%a3%b1%a3%b2%a3%b3%a3%b4%a3%b5%a3%b6%a3%b7%a3%b8%a3%b9 (20)\r\n" { pass $test } +} +send "¡¡ \r" +expect { + -ex "%a1%a1%20 (3)\r\n" { pass $test } +} +send "¥¢¥¤¥¦¥¨¥ª¥¬¥®¥°¥²¥´¥Ñ¥Ô¥×¥Ú¥Ý¥«¡«¥­¡«¥¯¡«¥±¡«¥³¡«¥Ï¡¬¥Ò¡¬¥Õ¡¬¥Ø¡¬¥Û¡¬\r" +expect { + -ex "%a5%a2%a5%a4%a5%a6%a5%a8%a5%aa%a5%ac%a5%ae%a5%b0%a5%b2%a5%b4%a5%d1%a5%d4%a5%d7%a5%da%a5%dd%a5%ab%a1%ab%a5%ad%a1%ab%a5%af%a1%ab%a5%b1%a1%ab%a5%b3%a1%ab%a5%cf%a1%ac%a5%d2%a1%ac%a5%d5%a1%ac%a5%d8%a1%ac%a5%db%a1%ac (70)\r\n" { pass $test } +} +send "ޱ޲޳޴޵޶ŽÞŽ·ŽÞޏŽÞ޹ŽÞŽºŽÞŽÊŽßŽËŽßŽÌŽßŽÍŽßŽÎŽß\r" +expect { + -ex "%8e%b1%8e%b2%8e%b3%8e%b4%8e%b5%8e%b6%8e%de%8e%b7%8e%de%8e%b8%8e%de%8e%b9%8e%de%8e%ba%8e%de%8e%ca%8e%df%8e%cb%8e%df%8e%cc%8e%df%8e%cd%8e%df%8e%ce%8e%df (50)\r\n" { pass $test } +} +send "¤¢¤¤¤¦¤¨¤ª¤¬¤®¤°¤²¤´¤Ñ¤Ô¤×¤Ú¤Ý¤«¡«¤­¡«¤¯¡«¤±¡«¤³¡«¤Ï¡¬¤Ò¡¬¤Õ¡¬¤Ø¡¬¤Û¡¬\r" +expect { + -ex "%a4%a2%a4%a4%a4%a6%a4%a8%a4%aa%a4%ac%a4%ae%a4%b0%a4%b2%a4%b4%a4%d1%a4%d4%a4%d7%a4%da%a4%dd%a4%ab%a1%ab%a4%ad%a1%ab%a4%af%a1%ab%a4%b1%a1%ab%a4%b3%a1%ab%a4%cf%a1%ac%a4%d2%a1%ac%a4%d5%a1%ac%a4%d8%a1%ac%a4%db%a1%ac (70)\r\n" { pass $test } +} +close + +set test "full-width numerics to half-width counterparts" +spawn tests/conv_kana EUC-JP "n" +expect_after { + "\[^\r\n\]*\r\n" { fail $test } +} +send "£á£â£ã£ä£å£æ£ç£è£é£ê£ë£ì£í£î£ï£ð£ñ£ò£ó£ô£õ£ö£÷£ø£ù£ú\r" +expect { + -ex "%a3%e1%a3%e2%a3%e3%a3%e4%a3%e5%a3%e6%a3%e7%a3%e8%a3%e9%a3%ea%a3%eb%a3%ec%a3%ed%a3%ee%a3%ef%a3%f0%a3%f1%a3%f2%a3%f3%a3%f4%a3%f5%a3%f6%a3%f7%a3%f8%a3%f9%a3%fa (52)\r\n" { pass $test } +} +send "£Á£Â£Ã£Ä£Å£Æ£Ç£È£É£Ê£Ë£Ì£Í£Î£Ï£Ð£Ñ£Ò£Ó£Ô£Õ£Ö£×£Ø£Ù£Ú\r" +expect { + -ex "%a3%c1%a3%c2%a3%c3%a3%c4%a3%c5%a3%c6%a3%c7%a3%c8%a3%c9%a3%ca%a3%cb%a3%cc%a3%cd%a3%ce%a3%cf%a3%d0%a3%d1%a3%d2%a3%d3%a3%d4%a3%d5%a3%d6%a3%d7%a3%d8%a3%d9%a3%da (52)\r\n" { pass $test } +} +send "abcdefghijklmnopqrstuvwxyz\r" +expect { + -ex "%61%62%63%64%65%66%67%68%69%6a%6b%6c%6d%6e%6f%70%71%72%73%74%75%76%77%78%79%7a (26)\r\n" { pass $test } +} +send "ABCDEFGHIJKLMNOPQRSTUVWXYZ\r" +expect { + -ex "%41%42%43%44%45%46%47%48%49%4a%4b%4c%4d%4e%4f%50%51%52%53%54%55%56%57%58%59%5a (26)\r\n" { pass $test } +} +send "0123456789\r" +expect { + -ex "%30%31%32%33%34%35%36%37%38%39 (10)\r\n" { pass $test } +} +send "£°£±£²£³£´£µ£¶£·£¸£¹\r" +expect { + -ex "%30%31%32%33%34%35%36%37%38%39 (10)\r\n" { pass $test } +} +send "¡¡ \r" +expect { + -ex "%a1%a1%20 (3)\r\n" { pass $test } +} +send "¥¢¥¤¥¦¥¨¥ª¥¬¥®¥°¥²¥´¥Ñ¥Ô¥×¥Ú¥Ý¥«¡«¥­¡«¥¯¡«¥±¡«¥³¡«¥Ï¡¬¥Ò¡¬¥Õ¡¬¥Ø¡¬¥Û¡¬\r" +expect { + -ex "%a5%a2%a5%a4%a5%a6%a5%a8%a5%aa%a5%ac%a5%ae%a5%b0%a5%b2%a5%b4%a5%d1%a5%d4%a5%d7%a5%da%a5%dd%a5%ab%a1%ab%a5%ad%a1%ab%a5%af%a1%ab%a5%b1%a1%ab%a5%b3%a1%ab%a5%cf%a1%ac%a5%d2%a1%ac%a5%d5%a1%ac%a5%d8%a1%ac%a5%db%a1%ac (70)\r\n" { pass $test } +} +send "ޱ޲޳޴޵޶ŽÞŽ·ŽÞޏŽÞ޹ŽÞŽºŽÞŽÊŽßŽËŽßŽÌŽßŽÍŽßŽÎŽß\r" +expect { + -ex "%8e%b1%8e%b2%8e%b3%8e%b4%8e%b5%8e%b6%8e%de%8e%b7%8e%de%8e%b8%8e%de%8e%b9%8e%de%8e%ba%8e%de%8e%ca%8e%df%8e%cb%8e%df%8e%cc%8e%df%8e%cd%8e%df%8e%ce%8e%df (50)\r\n" { pass $test } +} +send "¤¢¤¤¤¦¤¨¤ª¤¬¤®¤°¤²¤´¤Ñ¤Ô¤×¤Ú¤Ý¤«¡«¤­¡«¤¯¡«¤±¡«¤³¡«¤Ï¡¬¤Ò¡¬¤Õ¡¬¤Ø¡¬¤Û¡¬\r" +expect { + -ex "%a4%a2%a4%a4%a4%a6%a4%a8%a4%aa%a4%ac%a4%ae%a4%b0%a4%b2%a4%b4%a4%d1%a4%d4%a4%d7%a4%da%a4%dd%a4%ab%a1%ab%a4%ad%a1%ab%a4%af%a1%ab%a4%b1%a1%ab%a4%b3%a1%ab%a4%cf%a1%ac%a4%d2%a1%ac%a4%d5%a1%ac%a4%d8%a1%ac%a4%db%a1%ac (70)\r\n" { pass $test } +} +close + +set test "full-width numerics to half-width counterparts" +spawn tests/conv_kana EUC-JP "n" +expect_after { + "\[^\r\n\]*\r\n" { fail $test } +} +send "£á£â£ã£ä£å£æ£ç£è£é£ê£ë£ì£í£î£ï£ð£ñ£ò£ó£ô£õ£ö£÷£ø£ù£ú\r" +expect { + -ex "%a3%e1%a3%e2%a3%e3%a3%e4%a3%e5%a3%e6%a3%e7%a3%e8%a3%e9%a3%ea%a3%eb%a3%ec%a3%ed%a3%ee%a3%ef%a3%f0%a3%f1%a3%f2%a3%f3%a3%f4%a3%f5%a3%f6%a3%f7%a3%f8%a3%f9%a3%fa (52)\r\n" { pass $test } +} +send "£Á£Â£Ã£Ä£Å£Æ£Ç£È£É£Ê£Ë£Ì£Í£Î£Ï£Ð£Ñ£Ò£Ó£Ô£Õ£Ö£×£Ø£Ù£Ú\r" +expect { + -ex "%a3%c1%a3%c2%a3%c3%a3%c4%a3%c5%a3%c6%a3%c7%a3%c8%a3%c9%a3%ca%a3%cb%a3%cc%a3%cd%a3%ce%a3%cf%a3%d0%a3%d1%a3%d2%a3%d3%a3%d4%a3%d5%a3%d6%a3%d7%a3%d8%a3%d9%a3%da (52)\r\n" { pass $test } +} +send "abcdefghijklmnopqrstuvwxyz\r" +expect { + -ex "%61%62%63%64%65%66%67%68%69%6a%6b%6c%6d%6e%6f%70%71%72%73%74%75%76%77%78%79%7a (26)\r\n" { pass $test } +} +send "ABCDEFGHIJKLMNOPQRSTUVWXYZ\r" +expect { + -ex "%41%42%43%44%45%46%47%48%49%4a%4b%4c%4d%4e%4f%50%51%52%53%54%55%56%57%58%59%5a (26)\r\n" { pass $test } +} +send "0123456789\r" +expect { + -ex "%30%31%32%33%34%35%36%37%38%39 (10)\r\n" { pass $test } +} +send "£°£±£²£³£´£µ£¶£·£¸£¹\r" +expect { + -ex "%30%31%32%33%34%35%36%37%38%39 (10)\r\n" { pass $test } +} +send "¡¡ \r" +expect { + -ex "%a1%a1%20 (3)\r\n" { pass $test } +} +send "¥¢¥¤¥¦¥¨¥ª¥¬¥®¥°¥²¥´¥Ñ¥Ô¥×¥Ú¥Ý¥«¡«¥­¡«¥¯¡«¥±¡«¥³¡«¥Ï¡¬¥Ò¡¬¥Õ¡¬¥Ø¡¬¥Û¡¬\r" +expect { + -ex "%a5%a2%a5%a4%a5%a6%a5%a8%a5%aa%a5%ac%a5%ae%a5%b0%a5%b2%a5%b4%a5%d1%a5%d4%a5%d7%a5%da%a5%dd%a5%ab%a1%ab%a5%ad%a1%ab%a5%af%a1%ab%a5%b1%a1%ab%a5%b3%a1%ab%a5%cf%a1%ac%a5%d2%a1%ac%a5%d5%a1%ac%a5%d8%a1%ac%a5%db%a1%ac (70)\r\n" { pass $test } +} +send "ޱ޲޳޴޵޶ŽÞŽ·ŽÞޏŽÞ޹ŽÞŽºŽÞŽÊŽßŽËŽßŽÌŽßŽÍŽßŽÎŽß\r" +expect { + -ex "%8e%b1%8e%b2%8e%b3%8e%b4%8e%b5%8e%b6%8e%de%8e%b7%8e%de%8e%b8%8e%de%8e%b9%8e%de%8e%ba%8e%de%8e%ca%8e%df%8e%cb%8e%df%8e%cc%8e%df%8e%cd%8e%df%8e%ce%8e%df (50)\r\n" { pass $test } +} +send "¤¢¤¤¤¦¤¨¤ª¤¬¤®¤°¤²¤´¤Ñ¤Ô¤×¤Ú¤Ý¤«¡«¤­¡«¤¯¡«¤±¡«¤³¡«¤Ï¡¬¤Ò¡¬¤Õ¡¬¤Ø¡¬¤Û¡¬\r" +expect { + -ex "%a4%a2%a4%a4%a4%a6%a4%a8%a4%aa%a4%ac%a4%ae%a4%b0%a4%b2%a4%b4%a4%d1%a4%d4%a4%d7%a4%da%a4%dd%a4%ab%a1%ab%a4%ad%a1%ab%a4%af%a1%ab%a4%b1%a1%ab%a4%b3%a1%ab%a4%cf%a1%ac%a4%d2%a1%ac%a4%d5%a1%ac%a4%d8%a1%ac%a4%db%a1%ac (70)\r\n" { pass $test } +} +close + +set test "half-width numerics to full-width counterparts" +spawn tests/conv_kana EUC-JP "N" +expect_after { + "\[^\r\n\]*\r\n" { fail $test } +} +send "£á£â£ã£ä£å£æ£ç£è£é£ê£ë£ì£í£î£ï£ð£ñ£ò£ó£ô£õ£ö£÷£ø£ù£ú\r" +expect { + -ex "%a3%e1%a3%e2%a3%e3%a3%e4%a3%e5%a3%e6%a3%e7%a3%e8%a3%e9%a3%ea%a3%eb%a3%ec%a3%ed%a3%ee%a3%ef%a3%f0%a3%f1%a3%f2%a3%f3%a3%f4%a3%f5%a3%f6%a3%f7%a3%f8%a3%f9%a3%fa (52)\r\n" { pass $test } +} +send "£Á£Â£Ã£Ä£Å£Æ£Ç£È£É£Ê£Ë£Ì£Í£Î£Ï£Ð£Ñ£Ò£Ó£Ô£Õ£Ö£×£Ø£Ù£Ú\r" +expect { + -ex "%a3%c1%a3%c2%a3%c3%a3%c4%a3%c5%a3%c6%a3%c7%a3%c8%a3%c9%a3%ca%a3%cb%a3%cc%a3%cd%a3%ce%a3%cf%a3%d0%a3%d1%a3%d2%a3%d3%a3%d4%a3%d5%a3%d6%a3%d7%a3%d8%a3%d9%a3%da (52)\r\n" { pass $test } +} +send "abcdefghijklmnopqrstuvwxyz\r" +expect { + -ex "%61%62%63%64%65%66%67%68%69%6a%6b%6c%6d%6e%6f%70%71%72%73%74%75%76%77%78%79%7a (26)\r\n" { pass $test } +} +send "ABCDEFGHIJKLMNOPQRSTUVWXYZ\r" +expect { + -ex "%41%42%43%44%45%46%47%48%49%4a%4b%4c%4d%4e%4f%50%51%52%53%54%55%56%57%58%59%5a (26)\r\n" { pass $test } +} +send "0123456789\r" +expect { + -ex "%a3%b1%a3%b2%a3%b3%a3%b4%a3%b5%a3%b6%a3%b7%a3%b8%a3%b9 (20)\r\n" { pass $test } +} +send "£°£±£²£³£´£µ£¶£·£¸£¹\r" +expect { + -ex "%a3%b1%a3%b2%a3%b3%a3%b4%a3%b5%a3%b6%a3%b7%a3%b8%a3%b9 (20)\r\n" { pass $test } +} +send "¡¡ \r" +expect { + -ex "%a1%a1%20 (3)\r\n" { pass $test } +} +send "¥¢¥¤¥¦¥¨¥ª¥¬¥®¥°¥²¥´¥Ñ¥Ô¥×¥Ú¥Ý¥«¡«¥­¡«¥¯¡«¥±¡«¥³¡«¥Ï¡¬¥Ò¡¬¥Õ¡¬¥Ø¡¬¥Û¡¬\r" +expect { + -ex "%a5%a2%a5%a4%a5%a6%a5%a8%a5%aa%a5%ac%a5%ae%a5%b0%a5%b2%a5%b4%a5%d1%a5%d4%a5%d7%a5%da%a5%dd%a5%ab%a1%ab%a5%ad%a1%ab%a5%af%a1%ab%a5%b1%a1%ab%a5%b3%a1%ab%a5%cf%a1%ac%a5%d2%a1%ac%a5%d5%a1%ac%a5%d8%a1%ac%a5%db%a1%ac (70)\r\n" { pass $test } +} +send "ޱ޲޳޴޵޶ŽÞŽ·ŽÞޏŽÞ޹ŽÞŽºŽÞŽÊŽßŽËŽßŽÌŽßŽÍŽßŽÎŽß\r" +expect { + -ex "%8e%b1%8e%b2%8e%b3%8e%b4%8e%b5%8e%b6%8e%de%8e%b7%8e%de%8e%b8%8e%de%8e%b9%8e%de%8e%ba%8e%de%8e%ca%8e%df%8e%cb%8e%df%8e%cc%8e%df%8e%cd%8e%df%8e%ce%8e%df (50)\r\n" { pass $test } +} +send "¤¢¤¤¤¦¤¨¤ª¤¬¤®¤°¤²¤´¤Ñ¤Ô¤×¤Ú¤Ý¤«¡«¤­¡«¤¯¡«¤±¡«¤³¡«¤Ï¡¬¤Ò¡¬¤Õ¡¬¤Ø¡¬¤Û¡¬\r" +expect { + -ex "%a4%a2%a4%a4%a4%a6%a4%a8%a4%aa%a4%ac%a4%ae%a4%b0%a4%b2%a4%b4%a4%d1%a4%d4%a4%d7%a4%da%a4%dd%a4%ab%a1%ab%a4%ad%a1%ab%a4%af%a1%ab%a4%b1%a1%ab%a4%b3%a1%ab%a4%cf%a1%ac%a4%d2%a1%ac%a4%d5%a1%ac%a4%d8%a1%ac%a4%db%a1%ac (70)\r\n" { pass $test } +} +close + +set test "transliterate half-width numerics to full-width counterparts and full-width to half-width at a time" +spawn tests/conv_kana EUC-JP "nN" +expect_after { + "\[^\r\n\]*\r\n" { fail $test } +} +send "£á£â£ã£ä£å£æ£ç£è£é£ê£ë£ì£í£î£ï£ð£ñ£ò£ó£ô£õ£ö£÷£ø£ù£ú\r" +expect { + -ex "%a3%e1%a3%e2%a3%e3%a3%e4%a3%e5%a3%e6%a3%e7%a3%e8%a3%e9%a3%ea%a3%eb%a3%ec%a3%ed%a3%ee%a3%ef%a3%f0%a3%f1%a3%f2%a3%f3%a3%f4%a3%f5%a3%f6%a3%f7%a3%f8%a3%f9%a3%fa (52)\r\n" { pass $test } +} +send "£Á£Â£Ã£Ä£Å£Æ£Ç£È£É£Ê£Ë£Ì£Í£Î£Ï£Ð£Ñ£Ò£Ó£Ô£Õ£Ö£×£Ø£Ù£Ú\r" +expect { + -ex "%a3%c1%a3%c2%a3%c3%a3%c4%a3%c5%a3%c6%a3%c7%a3%c8%a3%c9%a3%ca%a3%cb%a3%cc%a3%cd%a3%ce%a3%cf%a3%d0%a3%d1%a3%d2%a3%d3%a3%d4%a3%d5%a3%d6%a3%d7%a3%d8%a3%d9%a3%da (52)\r\n" { pass $test } +} +send "abcdefghijklmnopqrstuvwxyz\r" +expect { + -ex "%61%62%63%64%65%66%67%68%69%6a%6b%6c%6d%6e%6f%70%71%72%73%74%75%76%77%78%79%7a (26)\r\n" { pass $test } +} +send "ABCDEFGHIJKLMNOPQRSTUVWXYZ\r" +expect { + -ex "%41%42%43%44%45%46%47%48%49%4a%4b%4c%4d%4e%4f%50%51%52%53%54%55%56%57%58%59%5a (26)\r\n" { pass $test } +} +send "0123456789\r" +expect { + -ex "%a3%b0%a3%b1%a3%b2%a3%b3%a3%b4%a3%b5%a3%b6%a3%b7%a3%b8%a3%b9 (20)\r\n" { pass $test } +} +send "£°£±£²£³£´£µ£¶£·£¸£¹\r" +expect { + -ex "%30%31%32%33%34%35%36%37%38%39 (10)\r\n" { pass $test } +} +send "¡¡ \r" +expect { + -ex "%a1%a1%20 (3)\r\n" { pass $test } +} +send "¥¢¥¤¥¦¥¨¥ª¥¬¥®¥°¥²¥´¥Ñ¥Ô¥×¥Ú¥Ý¥«¡«¥­¡«¥¯¡«¥±¡«¥³¡«¥Ï¡¬¥Ò¡¬¥Õ¡¬¥Ø¡¬¥Û¡¬\r" +expect { + -ex "%a5%a2%a5%a4%a5%a6%a5%a8%a5%aa%a5%ac%a5%ae%a5%b0%a5%b2%a5%b4%a5%d1%a5%d4%a5%d7%a5%da%a5%dd%a5%ab%a1%ab%a5%ad%a1%ab%a5%af%a1%ab%a5%b1%a1%ab%a5%b3%a1%ab%a5%cf%a1%ac%a5%d2%a1%ac%a5%d5%a1%ac%a5%d8%a1%ac%a5%db%a1%ac (70)\r\n" { pass $test } +} +send "ޱ޲޳޴޵޶ŽÞŽ·ŽÞޏŽÞ޹ŽÞŽºŽÞŽÊŽßŽËŽßŽÌŽßŽÍŽßŽÎŽß\r" +expect { + -ex "%8e%b1%8e%b2%8e%b3%8e%b4%8e%b5%8e%b6%8e%de%8e%b7%8e%de%8e%b8%8e%de%8e%b9%8e%de%8e%ba%8e%de%8e%ca%8e%df%8e%cb%8e%df%8e%cc%8e%df%8e%cd%8e%df%8e%ce%8e%df (50)\r\n" { pass $test } +} +send "¤¢¤¤¤¦¤¨¤ª¤¬¤®¤°¤²¤´¤Ñ¤Ô¤×¤Ú¤Ý¤«¡«¤­¡«¤¯¡«¤±¡«¤³¡«¤Ï¡¬¤Ò¡¬¤Õ¡¬¤Ø¡¬¤Û¡¬\r" +expect { + -ex "%a4%a2%a4%a4%a4%a6%a4%a8%a4%aa%a4%ac%a4%ae%a4%b0%a4%b2%a4%b4%a4%d1%a4%d4%a4%d7%a4%da%a4%dd%a4%ab%a1%ab%a4%ad%a1%ab%a4%af%a1%ab%a4%b1%a1%ab%a4%b3%a1%ab%a4%cf%a1%ac%a4%d2%a1%ac%a4%d5%a1%ac%a4%d8%a1%ac%a4%db%a1%ac (70)\r\n" { pass $test } +} +close + +set test "full-width alphanumerics to half-width counterparts" +spawn tests/conv_kana EUC-JP "a" +expect_after { + "\[^\r\n\]*\r\n" { fail $test } +} +send "£á£â£ã£ä£å£æ£ç£è£é£ê£ë£ì£í£î£ï£ð£ñ£ò£ó£ô£õ£ö£÷£ø£ù£ú\r" +expect { + -ex "%61%62%63%64%65%66%67%68%69%6a%6b%6c%6d%6e%6f%70%71%72%73%74%75%76%77%78%79%7a (26)\r\n" { pass $test } +} +send "£Á£Â£Ã£Ä£Å£Æ£Ç£È£É£Ê£Ë£Ì£Í£Î£Ï£Ð£Ñ£Ò£Ó£Ô£Õ£Ö£×£Ø£Ù£Ú\r" +expect { + -ex "%41%42%43%44%45%46%47%48%49%4a%4b%4c%4d%4e%4f%50%51%52%53%54%55%56%57%58%59%5a (26)\r\n" { pass $test } +} +send "abcdefghijklmnopqrstuvwxyz\r" +expect { + -ex "%61%62%63%64%65%66%67%68%69%6a%6b%6c%6d%6e%6f%70%71%72%73%74%75%76%77%78%79%7a (26)\r\n" { pass $test } +} +send "ABCDEFGHIJKLMNOPQRSTUVWXYZ\r" +expect { + -ex "%41%42%43%44%45%46%47%48%49%4a%4b%4c%4d%4e%4f%50%51%52%53%54%55%56%57%58%59%5a (26)\r\n" { pass $test } +} +send "0123456789\r" +expect { + -ex "%30%31%32%33%34%35%36%37%38%39 (10)\r\n" { pass $test } +} +send "£°£±£²£³£´£µ£¶£·£¸£¹\r" +expect { + -ex "%30%31%32%33%34%35%36%37%38%39 (10)\r\n" { pass $test } +} +send "¡¡ \r" +expect { + -ex "%a1%a1%20 (3)\r\n" { pass $test } +} +send "¥¢¥¤¥¦¥¨¥ª¥¬¥®¥°¥²¥´¥Ñ¥Ô¥×¥Ú¥Ý¥«¡«¥­¡«¥¯¡«¥±¡«¥³¡«¥Ï¡¬¥Ò¡¬¥Õ¡¬¥Ø¡¬¥Û¡¬\r" +expect { + -ex "%a5%a2%a5%a4%a5%a6%a5%a8%a5%aa%a5%ac%a5%ae%a5%b0%a5%b2%a5%b4%a5%d1%a5%d4%a5%d7%a5%da%a5%dd%a5%ab%a1%ab%a5%ad%a1%ab%a5%af%a1%ab%a5%b1%a1%ab%a5%b3%a1%ab%a5%cf%a1%ac%a5%d2%a1%ac%a5%d5%a1%ac%a5%d8%a1%ac%a5%db%a1%ac (70)\r\n" { pass $test } +} +send "ޱ޲޳޴޵޶ŽÞŽ·ŽÞޏŽÞ޹ŽÞŽºŽÞŽÊŽßŽËŽßŽÌŽßŽÍŽßŽÎŽß\r" +expect { + -ex "%8e%b1%8e%b2%8e%b3%8e%b4%8e%b5%8e%b6%8e%de%8e%b7%8e%de%8e%b8%8e%de%8e%b9%8e%de%8e%ba%8e%de%8e%ca%8e%df%8e%cb%8e%df%8e%cc%8e%df%8e%cd%8e%df%8e%ce%8e%df (50)\r\n" { pass $test } +} +send "¤¢¤¤¤¦¤¨¤ª¤¬¤®¤°¤²¤´¤Ñ¤Ô¤×¤Ú¤Ý¤«¡«¤­¡«¤¯¡«¤±¡«¤³¡«¤Ï¡¬¤Ò¡¬¤Õ¡¬¤Ø¡¬¤Û¡¬\r" +expect { + -ex "%a4%a2%a4%a4%a4%a6%a4%a8%a4%aa%a4%ac%a4%ae%a4%b0%a4%b2%a4%b4%a4%d1%a4%d4%a4%d7%a4%da%a4%dd%a4%ab%a1%ab%a4%ad%a1%ab%a4%af%a1%ab%a4%b1%a1%ab%a4%b3%a1%ab%a4%cf%a1%ac%a4%d2%a1%ac%a4%d5%a1%ac%a4%d8%a1%ac%a4%db%a1%ac (70)\r\n" { pass $test } +} +close + +set test "half-width alphanumerics to full-width counterparts" +spawn tests/conv_kana EUC-JP "A" +expect_after { + "\[^\r\n\]*\r\n" { fail $test } +} +send "£á£â£ã£ä£å£æ£ç£è£é£ê£ë£ì£í£î£ï£ð£ñ£ò£ó£ô£õ£ö£÷£ø£ù£ú\r" +expect { + -ex "%a3%e1%a3%e2%a3%e3%a3%e4%a3%e5%a3%e6%a3%e7%a3%e8%a3%e9%a3%ea%a3%eb%a3%ec%a3%ed%a3%ee%a3%ef%a3%f0%a3%f1%a3%f2%a3%f3%a3%f4%a3%f5%a3%f6%a3%f7%a3%f8%a3%f9%a3%fa (52)\r\n" { pass $test } +} +send "£Á£Â£Ã£Ä£Å£Æ£Ç£È£É£Ê£Ë£Ì£Í£Î£Ï£Ð£Ñ£Ò£Ó£Ô£Õ£Ö£×£Ø£Ù£Ú\r" +expect { + -ex "%a3%c1%a3%c2%a3%c3%a3%c4%a3%c5%a3%c6%a3%c7%a3%c8%a3%c9%a3%ca%a3%cb%a3%cc%a3%cd%a3%ce%a3%cf%a3%d0%a3%d1%a3%d2%a3%d3%a3%d4%a3%d5%a3%d6%a3%d7%a3%d8%a3%d9%a3%da (52)\r\n" { pass $test } +} +send "0123456789\r" +expect { + -ex "%a3%b1%a3%b2%a3%b3%a3%b4%a3%b5%a3%b6%a3%b7%a3%b8%a3%b9 (20)\r\n" { pass $test } +} +send "£°£±£²£³£´£µ£¶£·£¸£¹\r" +expect { + -ex "%a3%b1%a3%b2%a3%b3%a3%b4%a3%b5%a3%b6%a3%b7%a3%b8%a3%b9 (20)\r\n" { pass $test } +} +send "¥¢¥¤¥¦¥¨¥ª¥¬¥®¥°¥²¥´¥Ñ¥Ô¥×¥Ú¥Ý¥«¡«¥­¡«¥¯¡«¥±¡«¥³¡«¥Ï¡¬¥Ò¡¬¥Õ¡¬¥Ø¡¬¥Û¡¬\r" +expect { + -ex "%a5%a2%a5%a4%a5%a6%a5%a8%a5%aa%a5%ac%a5%ae%a5%b0%a5%b2%a5%b4%a5%d1%a5%d4%a5%d7%a5%da%a5%dd%a5%ab%a1%ab%a5%ad%a1%ab%a5%af%a1%ab%a5%b1%a1%ab%a5%b3%a1%ab%a5%cf%a1%ac%a5%d2%a1%ac%a5%d5%a1%ac%a5%d8%a1%ac%a5%db%a1%ac (70)\r\n" { pass $test } +} +send "ޱ޲޳޴޵޶ŽÞŽ·ŽÞޏŽÞ޹ŽÞŽºŽÞŽÊŽßŽËŽßŽÌŽßŽÍŽßŽÎŽß\r" +expect { + -ex "%8e%b1%8e%b2%8e%b3%8e%b4%8e%b5%8e%b6%8e%de%8e%b7%8e%de%8e%b8%8e%de%8e%b9%8e%de%8e%ba%8e%de%8e%ca%8e%df%8e%cb%8e%df%8e%cc%8e%df%8e%cd%8e%df%8e%ce%8e%df (50)\r\n" { pass $test } +} +send "¤¢¤¤¤¦¤¨¤ª¤¬¤®¤°¤²¤´¤Ñ¤Ô¤×¤Ú¤Ý¤«¡«¤­¡«¤¯¡«¤±¡«¤³¡«¤Ï¡¬¤Ò¡¬¤Õ¡¬¤Ø¡¬¤Û¡¬\r" +expect { + -ex "%a4%a2%a4%a4%a4%a6%a4%a8%a4%aa%a4%ac%a4%ae%a4%b0%a4%b2%a4%b4%a4%d1%a4%d4%a4%d7%a4%da%a4%dd%a4%ab%a1%ab%a4%ad%a1%ab%a4%af%a1%ab%a4%b1%a1%ab%a4%b3%a1%ab%a4%cf%a1%ac%a4%d2%a1%ac%a4%d5%a1%ac%a4%d8%a1%ac%a4%db%a1%ac (70)\r\n" { pass $test } +} +close + +set test "transliterate half-width alphanumerics to full-width counterparts and full-width to half-width at a time" +spawn tests/conv_kana EUC-JP "Aa" +expect_after { + "\[^\r\n\]*\r\n" { fail $test } +} +send "£á£â£ã£ä£å£æ£ç£è£é£ê£ë£ì£í£î£ï£ð£ñ£ò£ó£ô£õ£ö£÷£ø£ù£ú\r" +expect { + -ex "%61%62%63%64%65%66%67%68%69%6a%6b%6c%6d%6e%6f%70%71%72%73%74%75%76%77%78%79%7a (26)\r\n" { pass $test } +} +send "£Á£Â£Ã£Ä£Å£Æ£Ç£È£É£Ê£Ë£Ì£Í£Î£Ï£Ð£Ñ£Ò£Ó£Ô£Õ£Ö£×£Ø£Ù£Ú\r" +expect { + -ex "%41%42%43%44%45%46%47%48%49%4a%4b%4c%4d%4e%4f%50%51%52%53%54%55%56%57%58%59%5a (26)\r\n" { pass $test } +} +send "abcdefghijklmnopqrstuvwxyz\r" +expect { + -ex "%a3%e1%a3%e2%a3%e3%a3%e4%a3%e5%a3%e6%a3%e7%a3%e8%a3%e9%a3%ea%a3%eb%a3%ec%a3%ed%a3%ee%a3%ef%a3%f0%a3%f1%a3%f2%a3%f3%a3%f4%a3%f5%a3%f6%a3%f7%a3%f8%a3%f9%a3%fa (52)\r\n" { pass $test } +} +send "ABCDEFGHIJKLMNOPQRSTUVWXYZ\r" +expect { + -ex "%a3%c1%a3%c2%a3%c3%a3%c4%a3%c5%a3%c6%a3%c7%a3%c8%a3%c9%a3%ca%a3%cb%a3%cc%a3%cd%a3%ce%a3%cf%a3%d0%a3%d1%a3%d2%a3%d3%a3%d4%a3%d5%a3%d6%a3%d7%a3%d8%a3%d9%a3%da (52)\r\n" { pass $test } +} +send "0123456789\r" +expect { + -ex "%a3%b0%a3%b1%a3%b2%a3%b3%a3%b4%a3%b5%a3%b6%a3%b7%a3%b8%a3%b9 (20)\r\n" { pass $test } +} +send "£°£±£²£³£´£µ£¶£·£¸£¹\r" +expect { + -ex "%30%31%32%33%34%35%36%37%38%39 (10)\r\n" { pass $test } +} +send "¡¡ \r" +expect { + -ex "%a1%a1%20 (3)\r\n" { pass $test } +} +send "¥¢¥¤¥¦¥¨¥ª¥¬¥®¥°¥²¥´¥Ñ¥Ô¥×¥Ú¥Ý¥«¡«¥­¡«¥¯¡«¥±¡«¥³¡«¥Ï¡¬¥Ò¡¬¥Õ¡¬¥Ø¡¬¥Û¡¬\r" +expect { + -ex "%a5%a2%a5%a4%a5%a6%a5%a8%a5%aa%a5%ac%a5%ae%a5%b0%a5%b2%a5%b4%a5%d1%a5%d4%a5%d7%a5%da%a5%dd%a5%ab%a1%ab%a5%ad%a1%ab%a5%af%a1%ab%a5%b1%a1%ab%a5%b3%a1%ab%a5%cf%a1%ac%a5%d2%a1%ac%a5%d5%a1%ac%a5%d8%a1%ac%a5%db%a1%ac (70)\r\n" { pass $test } +} +send "ޱ޲޳޴޵޶ŽÞŽ·ŽÞޏŽÞ޹ŽÞŽºŽÞŽÊŽßŽËŽßŽÌŽßŽÍŽßŽÎŽß\r" +expect { + -ex "%8e%b1%8e%b2%8e%b3%8e%b4%8e%b5%8e%b6%8e%de%8e%b7%8e%de%8e%b8%8e%de%8e%b9%8e%de%8e%ba%8e%de%8e%ca%8e%df%8e%cb%8e%df%8e%cc%8e%df%8e%cd%8e%df%8e%ce%8e%df (50)\r\n" { pass $test } +} +send "¤¢¤¤¤¦¤¨¤ª¤¬¤®¤°¤²¤´¤Ñ¤Ô¤×¤Ú¤Ý¤«¡«¤­¡«¤¯¡«¤±¡«¤³¡«¤Ï¡¬¤Ò¡¬¤Õ¡¬¤Ø¡¬¤Û¡¬\r" +expect { + -ex "%a4%a2%a4%a4%a4%a6%a4%a8%a4%aa%a4%ac%a4%ae%a4%b0%a4%b2%a4%b4%a4%d1%a4%d4%a4%d7%a4%da%a4%dd%a4%ab%a1%ab%a4%ad%a1%ab%a4%af%a1%ab%a4%b1%a1%ab%a4%b3%a1%ab%a4%cf%a1%ac%a4%d2%a1%ac%a4%d5%a1%ac%a4%d8%a1%ac%a4%db%a1%ac (70)\r\n" { pass $test } +} +close + +set test "full-width spaces to half-width counterparts" +spawn tests/conv_kana EUC-JP "s" +expect_after { + "\[^\r\n\]*\r\n" { fail $test } +} +send "£á£â£ã£ä£å£æ£ç£è£é£ê£ë£ì£í£î£ï£ð£ñ£ò£ó£ô£õ£ö£÷£ø£ù£ú\r" +expect { + -ex "%a3%e1%a3%e2%a3%e3%a3%e4%a3%e5%a3%e6%a3%e7%a3%e8%a3%e9%a3%ea%a3%eb%a3%ec%a3%ed%a3%ee%a3%ef%a3%f0%a3%f1%a3%f2%a3%f3%a3%f4%a3%f5%a3%f6%a3%f7%a3%f8%a3%f9%a3%fa (52)\r\n" { pass $test } +} +send "£Á£Â£Ã£Ä£Å£Æ£Ç£È£É£Ê£Ë£Ì£Í£Î£Ï£Ð£Ñ£Ò£Ó£Ô£Õ£Ö£×£Ø£Ù£Ú\r" +expect { + -ex "%a3%c1%a3%c2%a3%c3%a3%c4%a3%c5%a3%c6%a3%c7%a3%c8%a3%c9%a3%ca%a3%cb%a3%cc%a3%cd%a3%ce%a3%cf%a3%d0%a3%d1%a3%d2%a3%d3%a3%d4%a3%d5%a3%d6%a3%d7%a3%d8%a3%d9%a3%da (52)\r\n" { pass $test } +} +send "abcdefghijklmnopqrstuvwxyz\r" +expect { + -ex "%61%62%63%64%65%66%67%68%69%6a%6b%6c%6d%6e%6f%70%71%72%73%74%75%76%77%78%79%7a (26)\r\n" { pass $test } +} +send "ABCDEFGHIJKLMNOPQRSTUVWXYZ\r" +expect { + -ex "%41%42%43%44%45%46%47%48%49%4a%4b%4c%4d%4e%4f%50%51%52%53%54%55%56%57%58%59%5a (26)\r\n" { pass $test } +} +send "0123456789\r" +expect { + -ex "%30%31%32%33%34%35%36%37%38%39 (10)\r\n" { pass $test } +} +send "£°£±£²£³£´£µ£¶£·£¸£¹\r" +expect { + -ex "%a3%b1%a3%b2%a3%b3%a3%b4%a3%b5%a3%b6%a3%b7%a3%b8%a3%b9 (20)\r\n" { pass $test } +} +send "¡¡ \r" +expect { + -ex "%20%20 (2)\r\n" { pass $test } +} +send "¥¢¥¤¥¦¥¨¥ª¥¬¥®¥°¥²¥´¥Ñ¥Ô¥×¥Ú¥Ý¥«¡«¥­¡«¥¯¡«¥±¡«¥³¡«¥Ï¡¬¥Ò¡¬¥Õ¡¬¥Ø¡¬¥Û¡¬\r" +expect { + -ex "%a5%a2%a5%a4%a5%a6%a5%a8%a5%aa%a5%ac%a5%ae%a5%b0%a5%b2%a5%b4%a5%d1%a5%d4%a5%d7%a5%da%a5%dd%a5%ab%a1%ab%a5%ad%a1%ab%a5%af%a1%ab%a5%b1%a1%ab%a5%b3%a1%ab%a5%cf%a1%ac%a5%d2%a1%ac%a5%d5%a1%ac%a5%d8%a1%ac%a5%db%a1%ac (70)\r\n" { pass $test } +} +send "ޱ޲޳޴޵޶ŽÞŽ·ŽÞޏŽÞ޹ŽÞŽºŽÞŽÊŽßŽËŽßŽÌŽßŽÍŽßŽÎŽß\r" +expect { + -ex "%8e%b1%8e%b2%8e%b3%8e%b4%8e%b5%8e%b6%8e%de%8e%b7%8e%de%8e%b8%8e%de%8e%b9%8e%de%8e%ba%8e%de%8e%ca%8e%df%8e%cb%8e%df%8e%cc%8e%df%8e%cd%8e%df%8e%ce%8e%df (50)\r\n" { pass $test } +} +send "¤¢¤¤¤¦¤¨¤ª¤¬¤®¤°¤²¤´¤Ñ¤Ô¤×¤Ú¤Ý¤«¡«¤­¡«¤¯¡«¤±¡«¤³¡«¤Ï¡¬¤Ò¡¬¤Õ¡¬¤Ø¡¬¤Û¡¬\r" +expect { + -ex "%a4%a2%a4%a4%a4%a6%a4%a8%a4%aa%a4%ac%a4%ae%a4%b0%a4%b2%a4%b4%a4%d1%a4%d4%a4%d7%a4%da%a4%dd%a4%ab%a1%ab%a4%ad%a1%ab%a4%af%a1%ab%a4%b1%a1%ab%a4%b3%a1%ab%a4%cf%a1%ac%a4%d2%a1%ac%a4%d5%a1%ac%a4%d8%a1%ac%a4%db%a1%ac (70)\r\n" { pass $test } +} +close + +set test "half-width spaces to full-width counterparts" +spawn tests/conv_kana EUC-JP "S" +expect_after { + "\[^\r\n\]*\r\n" { fail $test } +} +send "£á£â£ã£ä£å£æ£ç£è£é£ê£ë£ì£í£î£ï£ð£ñ£ò£ó£ô£õ£ö£÷£ø£ù£ú\r" +expect { + -ex "%a3%e1%a3%e2%a3%e3%a3%e4%a3%e5%a3%e6%a3%e7%a3%e8%a3%e9%a3%ea%a3%eb%a3%ec%a3%ed%a3%ee%a3%ef%a3%f0%a3%f1%a3%f2%a3%f3%a3%f4%a3%f5%a3%f6%a3%f7%a3%f8%a3%f9%a3%fa (52)\r\n" { pass $test } +} +send "£Á£Â£Ã£Ä£Å£Æ£Ç£È£É£Ê£Ë£Ì£Í£Î£Ï£Ð£Ñ£Ò£Ó£Ô£Õ£Ö£×£Ø£Ù£Ú\r" +expect { + -ex "%a3%c1%a3%c2%a3%c3%a3%c4%a3%c5%a3%c6%a3%c7%a3%c8%a3%c9%a3%ca%a3%cb%a3%cc%a3%cd%a3%ce%a3%cf%a3%d0%a3%d1%a3%d2%a3%d3%a3%d4%a3%d5%a3%d6%a3%d7%a3%d8%a3%d9%a3%da (52)\r\n" { pass $test } +} +send "0123456789\r" +expect { + -ex "%30%31%32%33%34%35%36%37%38%39 (10)\r\n" { pass $test } +} +send "£°£±£²£³£´£µ£¶£·£¸£¹\r" +expect { + -ex "%a3%b1%a3%b2%a3%b3%a3%b4%a3%b5%a3%b6%a3%b7%a3%b8%a3%b9 (20)\r\n" { pass $test } +} +send "¡¡ \r" +expect { + -ex "%a1%a1%a1%a1 (4)\r\n" { pass $test } +} +send "¥¢¥¤¥¦¥¨¥ª¥¬¥®¥°¥²¥´¥Ñ¥Ô¥×¥Ú¥Ý¥«¡«¥­¡«¥¯¡«¥±¡«¥³¡«¥Ï¡¬¥Ò¡¬¥Õ¡¬¥Ø¡¬¥Û¡¬\r" +expect { + -ex "%a5%a2%a5%a4%a5%a6%a5%a8%a5%aa%a5%ac%a5%ae%a5%b0%a5%b2%a5%b4%a5%d1%a5%d4%a5%d7%a5%da%a5%dd%a5%ab%a1%ab%a5%ad%a1%ab%a5%af%a1%ab%a5%b1%a1%ab%a5%b3%a1%ab%a5%cf%a1%ac%a5%d2%a1%ac%a5%d5%a1%ac%a5%d8%a1%ac%a5%db%a1%ac (70)\r\n" { pass $test } +} +send "ޱ޲޳޴޵޶ŽÞŽ·ŽÞޏŽÞ޹ŽÞŽºŽÞŽÊŽßŽËŽßŽÌŽßŽÍŽßŽÎŽß\r" +expect { + -ex "%8e%b1%8e%b2%8e%b3%8e%b4%8e%b5%8e%b6%8e%de%8e%b7%8e%de%8e%b8%8e%de%8e%b9%8e%de%8e%ba%8e%de%8e%ca%8e%df%8e%cb%8e%df%8e%cc%8e%df%8e%cd%8e%df%8e%ce%8e%df (50)\r\n" { pass $test } +} +send "¤¢¤¤¤¦¤¨¤ª¤¬¤®¤°¤²¤´¤Ñ¤Ô¤×¤Ú¤Ý¤«¡«¤­¡«¤¯¡«¤±¡«¤³¡«¤Ï¡¬¤Ò¡¬¤Õ¡¬¤Ø¡¬¤Û¡¬\r" +expect { + -ex "%a4%a2%a4%a4%a4%a6%a4%a8%a4%aa%a4%ac%a4%ae%a4%b0%a4%b2%a4%b4%a4%d1%a4%d4%a4%d7%a4%da%a4%dd%a4%ab%a1%ab%a4%ad%a1%ab%a4%af%a1%ab%a4%b1%a1%ab%a4%b3%a1%ab%a4%cf%a1%ac%a4%d2%a1%ac%a4%d5%a1%ac%a4%d8%a1%ac%a4%db%a1%ac (70)\r\n" { pass $test } +} +close + +set test "transliterate half-width spaces to full-width counterparts and full-width to half-width at a time" +spawn tests/conv_kana EUC-JP "Ss" +expect_after { + "\[^\r\n\]*\r\n" { fail $test } +} +send "£á£â£ã£ä£å£æ£ç£è£é£ê£ë£ì£í£î£ï£ð£ñ£ò£ó£ô£õ£ö£÷£ø£ù£ú\r" +expect { + -ex "%a3%e1%a3%e2%a3%e3%a3%e4%a3%e5%a3%e6%a3%e7%a3%e8%a3%e9%a3%ea%a3%eb%a3%ec%a3%ed%a3%ee%a3%ef%a3%f0%a3%f1%a3%f2%a3%f3%a3%f4%a3%f5%a3%f6%a3%f7%a3%f8%a3%f9%a3%fa (52)\r\n" { pass $test } +} +send "£Á£Â£Ã£Ä£Å£Æ£Ç£È£É£Ê£Ë£Ì£Í£Î£Ï£Ð£Ñ£Ò£Ó£Ô£Õ£Ö£×£Ø£Ù£Ú\r" +expect { + -ex "%a3%c1%a3%c2%a3%c3%a3%c4%a3%c5%a3%c6%a3%c7%a3%c8%a3%c9%a3%ca%a3%cb%a3%cc%a3%cd%a3%ce%a3%cf%a3%d0%a3%d1%a3%d2%a3%d3%a3%d4%a3%d5%a3%d6%a3%d7%a3%d8%a3%d9%a3%da (52)\r\n" { pass $test } +} +send "abcdefghijklmnopqrstuvwxyz\r" +expect { + -ex "%61%62%63%64%65%66%67%68%69%6a%6b%6c%6d%6e%6f%70%71%72%73%74%75%76%77%78%79%7a (26)\r\n" { pass $test } +} +send "ABCDEFGHIJKLMNOPQRSTUVWXYZ\r" +expect { + -ex "%41%42%43%44%45%46%47%48%49%4a%4b%4c%4d%4e%4f%50%51%52%53%54%55%56%57%58%59%5a (26)\r\n" { pass $test } +} +send "0123456789\r" +expect { + -ex "%30%31%32%33%34%35%36%37%38%39 (10)\r\n" { pass $test } +} +send "£°£±£²£³£´£µ£¶£·£¸£¹\r" +expect { + -ex "%a3%b1%a3%b2%a3%b3%a3%b4%a3%b5%a3%b6%a3%b7%a3%b8%a3%b9 (20)\r\n" { pass $test } +} +send "¡¡ \r" +expect { + -ex "%20%a1%a1 (3)\r\n" { pass $test } +} +send "¥¢¥¤¥¦¥¨¥ª¥¬¥®¥°¥²¥´¥Ñ¥Ô¥×¥Ú¥Ý¥«¡«¥­¡«¥¯¡«¥±¡«¥³¡«¥Ï¡¬¥Ò¡¬¥Õ¡¬¥Ø¡¬¥Û¡¬\r" +expect { + -ex "%a5%a2%a5%a4%a5%a6%a5%a8%a5%aa%a5%ac%a5%ae%a5%b0%a5%b2%a5%b4%a5%d1%a5%d4%a5%d7%a5%da%a5%dd%a5%ab%a1%ab%a5%ad%a1%ab%a5%af%a1%ab%a5%b1%a1%ab%a5%b3%a1%ab%a5%cf%a1%ac%a5%d2%a1%ac%a5%d5%a1%ac%a5%d8%a1%ac%a5%db%a1%ac (70)\r\n" { pass $test } +} +send "ޱ޲޳޴޵޶ŽÞŽ·ŽÞޏŽÞ޹ŽÞŽºŽÞŽÊŽßŽËŽßŽÌŽßŽÍŽßŽÎŽß\r" +expect { + -ex "%8e%b1%8e%b2%8e%b3%8e%b4%8e%b5%8e%b6%8e%de%8e%b7%8e%de%8e%b8%8e%de%8e%b9%8e%de%8e%ba%8e%de%8e%ca%8e%df%8e%cb%8e%df%8e%cc%8e%df%8e%cd%8e%df%8e%ce%8e%df (50)\r\n" { pass $test } +} +send "¤¢¤¤¤¦¤¨¤ª¤¬¤®¤°¤²¤´¤Ñ¤Ô¤×¤Ú¤Ý¤«¡«¤­¡«¤¯¡«¤±¡«¤³¡«¤Ï¡¬¤Ò¡¬¤Õ¡¬¤Ø¡¬¤Û¡¬\r" +expect { + -ex "%a4%a2%a4%a4%a4%a6%a4%a8%a4%aa%a4%ac%a4%ae%a4%b0%a4%b2%a4%b4%a4%d1%a4%d4%a4%d7%a4%da%a4%dd%a4%ab%a1%ab%a4%ad%a1%ab%a4%af%a1%ab%a4%b1%a1%ab%a4%b3%a1%ab%a4%cf%a1%ac%a4%d2%a1%ac%a4%d5%a1%ac%a4%d8%a1%ac%a4%db%a1%ac (70)\r\n" { pass $test } +} +close + +set test "full-width katakanas to half-width counterparts" +spawn tests/conv_kana EUC-JP "k" +expect_after { + "\[^\r\n\]*\r\n" { fail $test } +} +send "£á£â£ã£ä£å£æ£ç£è£é£ê£ë£ì£í£î£ï£ð£ñ£ò£ó£ô£õ£ö£÷£ø£ù£ú\r" +expect { + -ex "%a3%e1%a3%e2%a3%e3%a3%e4%a3%e5%a3%e6%a3%e7%a3%e8%a3%e9%a3%ea%a3%eb%a3%ec%a3%ed%a3%ee%a3%ef%a3%f0%a3%f1%a3%f2%a3%f3%a3%f4%a3%f5%a3%f6%a3%f7%a3%f8%a3%f9%a3%fa (52)\r\n" { pass $test } +} +send "£Á£Â£Ã£Ä£Å£Æ£Ç£È£É£Ê£Ë£Ì£Í£Î£Ï£Ð£Ñ£Ò£Ó£Ô£Õ£Ö£×£Ø£Ù£Ú\r" +expect { + -ex "%a3%c1%a3%c2%a3%c3%a3%c4%a3%c5%a3%c6%a3%c7%a3%c8%a3%c9%a3%ca%a3%cb%a3%cc%a3%cd%a3%ce%a3%cf%a3%d0%a3%d1%a3%d2%a3%d3%a3%d4%a3%d5%a3%d6%a3%d7%a3%d8%a3%d9%a3%da (52)\r\n" { pass $test } +} +send "abcdefghijklmnopqrstuvwxyz\r" +expect { + -ex "%61%62%63%64%65%66%67%68%69%6a%6b%6c%6d%6e%6f%70%71%72%73%74%75%76%77%78%79%7a (26)\r\n" { pass $test } +} +send "ABCDEFGHIJKLMNOPQRSTUVWXYZ\r" +expect { + -ex "%41%42%43%44%45%46%47%48%49%4a%4b%4c%4d%4e%4f%50%51%52%53%54%55%56%57%58%59%5a (26)\r\n" { pass $test } +} +send "0123456789\r" +expect { + -ex "%30%31%32%33%34%35%36%37%38%39 (10)\r\n" { pass $test } +} +send "£°£±£²£³£´£µ£¶£·£¸£¹\r" +expect { + -ex "%a3%b1%a3%b2%a3%b3%a3%b4%a3%b5%a3%b6%a3%b7%a3%b8%a3%b9 (20)\r\n" { pass $test } +} +send "¡¡ \r" +expect { + -ex "%a1%a1%20 (3)\r\n" { pass $test } +} +send "¥¢¥¤¥¦¥¨¥ª¥¬¥®¥°¥²¥´¥Ñ¥Ô¥×¥Ú¥Ý¥«¡«¥­¡«¥¯¡«¥±¡«¥³¡«¥Ï¡¬¥Ò¡¬¥Õ¡¬¥Ø¡¬¥Û¡¬\r" +expect { + -ex "%8e%b1%8e%b2%8e%b3%8e%b4%8e%b5%8e%b6%8e%de%8e%b7%8e%de%8e%b8%8e%de%8e%b9%8e%de%8e%ba%8e%de%8e%ca%8e%df%8e%cb%8e%df%8e%cc%8e%df%8e%cd%8e%df%8e%ce%8e%df%8e%b6%8e%de%8e%b7%8e%de%8e%b8%8e%de%8e%b9%8e%de%8e%ba%8e%de%8e%ca%8e%df%8e%cb%8e%df%8e%cc%8e%df%8e%cd%8e%df%8e%ce%8e%df (90)\r\n" { pass $test } +} +send "ޱ޲޳޴޵޶ŽÞŽ·ŽÞޏŽÞ޹ŽÞŽºŽÞŽÊŽßŽËŽßŽÌŽßŽÍŽßŽÎŽß\r" +expect { + -ex "%8e%b1%8e%b2%8e%b3%8e%b4%8e%b5%8e%b6%8e%de%8e%b7%8e%de%8e%b8%8e%de%8e%b9%8e%de%8e%ba%8e%de%8e%ca%8e%df%8e%cb%8e%df%8e%cc%8e%df%8e%cd%8e%df%8e%ce%8e%df (50)\r\n" { pass $test } +} +send "¤¢¤¤¤¦¤¨¤ª¤¬¤®¤°¤²¤´¤Ñ¤Ô¤×¤Ú¤Ý¤«¡«¤­¡«¤¯¡«¤±¡«¤³¡«¤Ï¡¬¤Ò¡¬¤Õ¡¬¤Ø¡¬¤Û¡¬\r" +expect { + -ex "%a4%a2%a4%a4%a4%a6%a4%a8%a4%aa%a4%ac%a4%ae%a4%b0%a4%b2%a4%b4%a4%d1%a4%d4%a4%d7%a4%da%a4%dd%a4%ab%8e%de%a4%ad%8e%de%a4%af%8e%de%a4%b1%8e%de%a4%b3%8e%de%a4%cf%8e%df%a4%d2%8e%df%a4%d5%8e%df%a4%d8%8e%df%a4%db%8e%df (70)\r\n" { pass $test } +} +close + +set test "half-width katakanas to full-width counterparts" +spawn tests/conv_kana EUC-JP "K" +expect_after { + "\[^\r\n\]*\r\n" { fail $test } +} +send "£á£â£ã£ä£å£æ£ç£è£é£ê£ë£ì£í£î£ï£ð£ñ£ò£ó£ô£õ£ö£÷£ø£ù£ú\r" +expect { + -ex "%a3%e1%a3%e2%a3%e3%a3%e4%a3%e5%a3%e6%a3%e7%a3%e8%a3%e9%a3%ea%a3%eb%a3%ec%a3%ed%a3%ee%a3%ef%a3%f0%a3%f1%a3%f2%a3%f3%a3%f4%a3%f5%a3%f6%a3%f7%a3%f8%a3%f9%a3%fa (52)\r\n" { pass $test } +} +send "£Á£Â£Ã£Ä£Å£Æ£Ç£È£É£Ê£Ë£Ì£Í£Î£Ï£Ð£Ñ£Ò£Ó£Ô£Õ£Ö£×£Ø£Ù£Ú\r" +expect { + -ex "%a3%c1%a3%c2%a3%c3%a3%c4%a3%c5%a3%c6%a3%c7%a3%c8%a3%c9%a3%ca%a3%cb%a3%cc%a3%cd%a3%ce%a3%cf%a3%d0%a3%d1%a3%d2%a3%d3%a3%d4%a3%d5%a3%d6%a3%d7%a3%d8%a3%d9%a3%da (52)\r\n" { pass $test } +} +send "abcdefghijklmnopqrstuvwxyz\r" +expect { + -ex "%61%62%63%64%65%66%67%68%69%6a%6b%6c%6d%6e%6f%70%71%72%73%74%75%76%77%78%79%7a (26)\r\n" { pass $test } +} +send "ABCDEFGHIJKLMNOPQRSTUVWXYZ\r" +expect { + -ex "%41%42%43%44%45%46%47%48%49%4a%4b%4c%4d%4e%4f%50%51%52%53%54%55%56%57%58%59%5a (26)\r\n" { pass $test } +} +send "0123456789\r" +expect { + -ex "%30%31%32%33%34%35%36%37%38%39 (10)\r\n" { pass $test } +} +send "£°£±£²£³£´£µ£¶£·£¸£¹\r" +expect { + -ex "%a3%b1%a3%b2%a3%b3%a3%b4%a3%b5%a3%b6%a3%b7%a3%b8%a3%b9 (20)\r\n" { pass $test } +} +send "¡¡ \r" +expect { + -ex "%a1%a1%20 (3)\r\n" { pass $test } +} +send "¥¢¥¤¥¦¥¨¥ª¥¬¥®¥°¥²¥´¥Ñ¥Ô¥×¥Ú¥Ý¥«¡«¥­¡«¥¯¡«¥±¡«¥³¡«¥Ï¡¬¥Ò¡¬¥Õ¡¬¥Ø¡¬¥Û¡¬\r" +expect { + -ex "%a5%a2%a5%a4%a5%a6%a5%a8%a5%aa%a5%ac%a5%ae%a5%b0%a5%b2%a5%b4%a5%d1%a5%d4%a5%d7%a5%da%a5%dd%a5%ab%a1%ab%a5%ad%a1%ab%a5%af%a1%ab%a5%b1%a1%ab%a5%b3%a1%ab%a5%cf%a1%ac%a5%d2%a1%ac%a5%d5%a1%ac%a5%d8%a1%ac%a5%db%a1%ac (70)\r\n" { pass $test } +} +send "ޱ޲޳޴޵޶ŽÞŽ·ŽÞޏŽÞ޹ŽÞŽºŽÞŽÊŽßŽËŽßŽÌŽßŽÍŽßŽÎŽß\r" +expect { + -ex "%a5%a2%a5%a4%a5%a6%a5%a8%a5%aa%a5%ab%a1%ab%a5%ad%a1%ab%a5%af%a1%ab%a5%b1%a1%ab%a5%b3%a1%ab%a5%cf%a1%ac%a5%d2%a1%ac%a5%d5%a1%ac%a5%d8%a1%ac%a5%db%a1%ac (50)\r\n" { pass $test } +} +send "¤¢¤¤¤¦¤¨¤ª¤¬¤®¤°¤²¤´¤Ñ¤Ô¤×¤Ú¤Ý¤«¡«¤­¡«¤¯¡«¤±¡«¤³¡«¤Ï¡¬¤Ò¡¬¤Õ¡¬¤Ø¡¬¤Û¡¬\r" +expect { + -ex "%a4%a2%a4%a4%a4%a6%a4%a8%a4%aa%a4%ac%a4%ae%a4%b0%a4%b2%a4%b4%a4%d1%a4%d4%a4%d7%a4%da%a4%dd%a4%ab%a1%ab%a4%ad%a1%ab%a4%af%a1%ab%a4%b1%a1%ab%a4%b3%a1%ab%a4%cf%a1%ac%a4%d2%a1%ac%a4%d5%a1%ac%a4%d8%a1%ac%a4%db%a1%ac (70)\r\n" { pass $test } +} +close + +set test "transliterate half-width katakanas to full-width counterparts and full-width to half-width at a time" +spawn tests/conv_kana EUC-JP "kK" +expect_after { + "\[^\r\n\]*\r\n" { fail $test } +} +send "£á£â£ã£ä£å£æ£ç£è£é£ê£ë£ì£í£î£ï£ð£ñ£ò£ó£ô£õ£ö£÷£ø£ù£ú\r" +expect { + -ex "%a3%e1%a3%e2%a3%e3%a3%e4%a3%e5%a3%e6%a3%e7%a3%e8%a3%e9%a3%ea%a3%eb%a3%ec%a3%ed%a3%ee%a3%ef%a3%f0%a3%f1%a3%f2%a3%f3%a3%f4%a3%f5%a3%f6%a3%f7%a3%f8%a3%f9%a3%fa (52)\r\n" { pass $test } +} +send "£Á£Â£Ã£Ä£Å£Æ£Ç£È£É£Ê£Ë£Ì£Í£Î£Ï£Ð£Ñ£Ò£Ó£Ô£Õ£Ö£×£Ø£Ù£Ú\r" +expect { + -ex "%a3%c1%a3%c2%a3%c3%a3%c4%a3%c5%a3%c6%a3%c7%a3%c8%a3%c9%a3%ca%a3%cb%a3%cc%a3%cd%a3%ce%a3%cf%a3%d0%a3%d1%a3%d2%a3%d3%a3%d4%a3%d5%a3%d6%a3%d7%a3%d8%a3%d9%a3%da (52)\r\n" { pass $test } +} +send "abcdefghijklmnopqrstuvwxyz\r" +expect { + -ex "%61%62%63%64%65%66%67%68%69%6a%6b%6c%6d%6e%6f%70%71%72%73%74%75%76%77%78%79%7a (26)\r\n" { pass $test } +} +send "ABCDEFGHIJKLMNOPQRSTUVWXYZ\r" +expect { + -ex "%41%42%43%44%45%46%47%48%49%4a%4b%4c%4d%4e%4f%50%51%52%53%54%55%56%57%58%59%5a (26)\r\n" { pass $test } +} +send "0123456789\r" +expect { + -ex "%30%31%32%33%34%35%36%37%38%39 (10)\r\n" { pass $test } +} +send "£°£±£²£³£´£µ£¶£·£¸£¹\r" +expect { + -ex "%a3%b1%a3%b2%a3%b3%a3%b4%a3%b5%a3%b6%a3%b7%a3%b8%a3%b9 (20)\r\n" { pass $test } +} +send "¡¡ \r" +expect { + -ex "%a1%a1%20 (3)\r\n" { pass $test } +} +send "¥¢¥¤¥¦¥¨¥ª¥¬¥®¥°¥²¥´¥Ñ¥Ô¥×¥Ú¥Ý¥«¡«¥­¡«¥¯¡«¥±¡«¥³¡«¥Ï¡¬¥Ò¡¬¥Õ¡¬¥Ø¡¬¥Û¡¬\r" +expect { + -ex "%8e%b1%8e%b2%8e%b3%8e%b4%8e%b5%8e%b6%8e%de%8e%b7%8e%de%8e%b8%8e%de%8e%b9%8e%de%8e%ba%8e%de%8e%ca%8e%df%8e%cb%8e%df%8e%cc%8e%df%8e%cd%8e%df%8e%ce%8e%df%8e%b6%8e%de%8e%b7%8e%de%8e%b8%8e%de%8e%b9%8e%de%8e%ba%8e%de%8e%ca%8e%df%8e%cb%8e%df%8e%cc%8e%df%8e%cd%8e%df%8e%ce%8e%df (90)\r\n" { pass $test } +} +send "ޱ޲޳޴޵޶ŽÞŽ·ŽÞޏŽÞ޹ŽÞŽºŽÞŽÊŽßŽËŽßŽÌŽßŽÍŽßŽÎŽß\r" +expect { + -ex "%a5%a2%a5%a4%a5%a6%a5%a8%a5%aa%a5%ab%a1%ab%a5%ad%a1%ab%a5%af%a1%ab%a5%b1%a1%ab%a5%b3%a1%ab%a5%cf%a1%ac%a5%d2%a1%ac%a5%d5%a1%ac%a5%d8%a1%ac%a5%db%a1%ac (50)\r\n" { pass $test } +} +send "¤¢¤¤¤¦¤¨¤ª¤¬¤®¤°¤²¤´¤Ñ¤Ô¤×¤Ú¤Ý¤«¡«¤­¡«¤¯¡«¤±¡«¤³¡«¤Ï¡¬¤Ò¡¬¤Õ¡¬¤Ø¡¬¤Û¡¬\r" +expect { + -ex "%a4%a2%a4%a4%a4%a6%a4%a8%a4%aa%a4%ac%a4%ae%a4%b0%a4%b2%a4%b4%a4%d1%a4%d4%a4%d7%a4%da%a4%dd%a4%ab%8e%de%a4%ad%8e%de%a4%af%8e%de%a4%b1%8e%de%a4%b3%8e%de%a4%cf%8e%df%a4%d2%8e%df%a4%d5%8e%df%a4%d8%8e%df%a4%db%8e%df (70)\r\n" { pass $test } +} +close + +set test "full-width hiraganas to half-width katakana" +spawn tests/conv_kana EUC-JP "h" +expect_after { + "\[^\r\n\]*\r\n" { fail $test } +} +send "£á£â£ã£ä£å£æ£ç£è£é£ê£ë£ì£í£î£ï£ð£ñ£ò£ó£ô£õ£ö£÷£ø£ù£ú\r" +expect { + -ex "%a3%e1%a3%e2%a3%e3%a3%e4%a3%e5%a3%e6%a3%e7%a3%e8%a3%e9%a3%ea%a3%eb%a3%ec%a3%ed%a3%ee%a3%ef%a3%f0%a3%f1%a3%f2%a3%f3%a3%f4%a3%f5%a3%f6%a3%f7%a3%f8%a3%f9%a3%fa (52)\r\n" { pass $test } +} +send "£Á£Â£Ã£Ä£Å£Æ£Ç£È£É£Ê£Ë£Ì£Í£Î£Ï£Ð£Ñ£Ò£Ó£Ô£Õ£Ö£×£Ø£Ù£Ú\r" +expect { + -ex "%a3%c1%a3%c2%a3%c3%a3%c4%a3%c5%a3%c6%a3%c7%a3%c8%a3%c9%a3%ca%a3%cb%a3%cc%a3%cd%a3%ce%a3%cf%a3%d0%a3%d1%a3%d2%a3%d3%a3%d4%a3%d5%a3%d6%a3%d7%a3%d8%a3%d9%a3%da (52)\r\n" { pass $test } +} +send "abcdefghijklmnopqrstuvwxyz\r" +expect { + -ex "%61%62%63%64%65%66%67%68%69%6a%6b%6c%6d%6e%6f%70%71%72%73%74%75%76%77%78%79%7a (26)\r\n" { pass $test } +} +send "ABCDEFGHIJKLMNOPQRSTUVWXYZ\r" +expect { + -ex "%41%42%43%44%45%46%47%48%49%4a%4b%4c%4d%4e%4f%50%51%52%53%54%55%56%57%58%59%5a (26)\r\n" { pass $test } +} +send "0123456789\r" +expect { + -ex "%30%31%32%33%34%35%36%37%38%39 (10)\r\n" { pass $test } +} +send "£°£±£²£³£´£µ£¶£·£¸£¹\r" +expect { + -ex "%a3%b1%a3%b2%a3%b3%a3%b4%a3%b5%a3%b6%a3%b7%a3%b8%a3%b9 (20)\r\n" { pass $test } +} +send "¡¡ \r" +expect { + -ex "%a1%a1%20 (3)\r\n" { pass $test } +} +send "¥¢¥¤¥¦¥¨¥ª¥¬¥®¥°¥²¥´¥Ñ¥Ô¥×¥Ú¥Ý¥«¡«¥­¡«¥¯¡«¥±¡«¥³¡«¥Ï¡¬¥Ò¡¬¥Õ¡¬¥Ø¡¬¥Û¡¬\r" +expect { + -ex "%a5%a2%a5%a4%a5%a6%a5%a8%a5%aa%a5%ac%a5%ae%a5%b0%a5%b2%a5%b4%a5%d1%a5%d4%a5%d7%a5%da%a5%dd%a5%ab%8e%de%a5%ad%8e%de%a5%af%8e%de%a5%b1%8e%de%a5%b3%8e%de%a5%cf%8e%df%a5%d2%8e%df%a5%d5%8e%df%a5%d8%8e%df%a5%db%8e%df (70)\r\n" { pass $test } +} +send "ޱ޲޳޴޵޶ŽÞŽ·ŽÞޏŽÞ޹ŽÞŽºŽÞŽÊŽßŽËŽßŽÌŽßŽÍŽßŽÎŽß\r" +expect { + -ex "%8e%b1%8e%b2%8e%b3%8e%b4%8e%b5%8e%b6%8e%de%8e%b7%8e%de%8e%b8%8e%de%8e%b9%8e%de%8e%ba%8e%de%8e%ca%8e%df%8e%cb%8e%df%8e%cc%8e%df%8e%cd%8e%df%8e%ce%8e%df (50)\r\n" { pass $test } +} +send "¤¢¤¤¤¦¤¨¤ª¤¬¤®¤°¤²¤´¤Ñ¤Ô¤×¤Ú¤Ý¤«¡«¤­¡«¤¯¡«¤±¡«¤³¡«¤Ï¡¬¤Ò¡¬¤Õ¡¬¤Ø¡¬¤Û¡¬\r" +expect { + -ex "%8e%b1%8e%b2%8e%b3%8e%b4%8e%b5%8e%b6%8e%de%8e%b7%8e%de%8e%b8%8e%de%8e%b9%8e%de%8e%ba%8e%de%8e%ca%8e%df%8e%cb%8e%df%8e%cc%8e%df%8e%cd%8e%df%8e%ce%8e%df%8e%b6%8e%de%8e%b7%8e%de%8e%b8%8e%de%8e%b9%8e%de%8e%ba%8e%de%8e%ca%8e%df%8e%cb%8e%df%8e%cc%8e%df%8e%cd%8e%df%8e%ce%8e%df (90)\r\n" { pass $test } +} +close + +set test "half-width katakanas to full-width hiragana" +spawn tests/conv_kana EUC-JP "H" +expect_after { + "\[^\r\n\]*\r\n" { fail $test } +} +send "£á£â£ã£ä£å£æ£ç£è£é£ê£ë£ì£í£î£ï£ð£ñ£ò£ó£ô£õ£ö£÷£ø£ù£ú\r" +expect { + -ex "%a3%e1%a3%e2%a3%e3%a3%e4%a3%e5%a3%e6%a3%e7%a3%e8%a3%e9%a3%ea%a3%eb%a3%ec%a3%ed%a3%ee%a3%ef%a3%f0%a3%f1%a3%f2%a3%f3%a3%f4%a3%f5%a3%f6%a3%f7%a3%f8%a3%f9%a3%fa (52)\r\n" { pass $test } +} +send "£Á£Â£Ã£Ä£Å£Æ£Ç£È£É£Ê£Ë£Ì£Í£Î£Ï£Ð£Ñ£Ò£Ó£Ô£Õ£Ö£×£Ø£Ù£Ú\r" +expect { + -ex "%a3%c1%a3%c2%a3%c3%a3%c4%a3%c5%a3%c6%a3%c7%a3%c8%a3%c9%a3%ca%a3%cb%a3%cc%a3%cd%a3%ce%a3%cf%a3%d0%a3%d1%a3%d2%a3%d3%a3%d4%a3%d5%a3%d6%a3%d7%a3%d8%a3%d9%a3%da (52)\r\n" { pass $test } +} +send "abcdefghijklmnopqrstuvwxyz\r" +expect { + -ex "%61%62%63%64%65%66%67%68%69%6a%6b%6c%6d%6e%6f%70%71%72%73%74%75%76%77%78%79%7a (26)\r\n" { pass $test } +} +send "ABCDEFGHIJKLMNOPQRSTUVWXYZ\r" +expect { + -ex "%41%42%43%44%45%46%47%48%49%4a%4b%4c%4d%4e%4f%50%51%52%53%54%55%56%57%58%59%5a (26)\r\n" { pass $test } +} +send "0123456789\r" +expect { + -ex "%30%31%32%33%34%35%36%37%38%39 (10)\r\n" { pass $test } +} +send "£°£±£²£³£´£µ£¶£·£¸£¹\r" +expect { + -ex "%a3%b1%a3%b2%a3%b3%a3%b4%a3%b5%a3%b6%a3%b7%a3%b8%a3%b9 (20)\r\n" { pass $test } +} +send "¡¡ \r" +expect { + -ex "%a1%a1%20 (3)\r\n" { pass $test } +} +send "¥¢¥¤¥¦¥¨¥ª¥¬¥®¥°¥²¥´¥Ñ¥Ô¥×¥Ú¥Ý¥«¡«¥­¡«¥¯¡«¥±¡«¥³¡«¥Ï¡¬¥Ò¡¬¥Õ¡¬¥Ø¡¬¥Û¡¬\r" +expect { + -ex "%a5%a2%a5%a4%a5%a6%a5%a8%a5%aa%a5%ac%a5%ae%a5%b0%a5%b2%a5%b4%a5%d1%a5%d4%a5%d7%a5%da%a5%dd%a5%ab%a1%ab%a5%ad%a1%ab%a5%af%a1%ab%a5%b1%a1%ab%a5%b3%a1%ab%a5%cf%a1%ac%a5%d2%a1%ac%a5%d5%a1%ac%a5%d8%a1%ac%a5%db%a1%ac (70)\r\n" { pass $test } +} +send "ޱ޲޳޴޵޶ŽÞŽ·ŽÞޏŽÞ޹ŽÞŽºŽÞŽÊŽßŽËŽßŽÌŽßŽÍŽßŽÎŽß\r" +expect { + -ex "%a4%a2%a4%a4%a4%a6%a4%a8%a4%aa%a4%ab%a1%ab%a4%ad%a1%ab%a4%af%a1%ab%a4%b1%a1%ab%a4%b3%a1%ab%a4%cf%a1%ac%a4%d2%a1%ac%a4%d5%a1%ac%a4%d8%a1%ac%a4%db%a1%ac (50)\r\n" { pass $test } +} +send "¤¢¤¤¤¦¤¨¤ª¤¬¤®¤°¤²¤´¤Ñ¤Ô¤×¤Ú¤Ý¤«¡«¤­¡«¤¯¡«¤±¡«¤³¡«¤Ï¡¬¤Ò¡¬¤Õ¡¬¤Ø¡¬¤Û¡¬\r" +expect { + -ex "%a4%a2%a4%a4%a4%a6%a4%a8%a4%aa%a4%ac%a4%ae%a4%b0%a4%b2%a4%b4%a4%d1%a4%d4%a4%d7%a4%da%a4%dd%a4%ab%a1%ab%a4%ad%a1%ab%a4%af%a1%ab%a4%b1%a1%ab%a4%b3%a1%ab%a4%cf%a1%ac%a4%d2%a1%ac%a4%d5%a1%ac%a4%d8%a1%ac%a4%db%a1%ac (70)\r\n" { pass $test } +} +close + +set test "transliterate half-width katakanas to full-width hiraganas and full-width to half-width at a time" +spawn tests/conv_kana EUC-JP "hH" +expect_after { + "\[^\r\n\]*\r\n" { fail $test } +} +send "£á£â£ã£ä£å£æ£ç£è£é£ê£ë£ì£í£î£ï£ð£ñ£ò£ó£ô£õ£ö£÷£ø£ù£ú\r" +expect { + -ex "%a3%e1%a3%e2%a3%e3%a3%e4%a3%e5%a3%e6%a3%e7%a3%e8%a3%e9%a3%ea%a3%eb%a3%ec%a3%ed%a3%ee%a3%ef%a3%f0%a3%f1%a3%f2%a3%f3%a3%f4%a3%f5%a3%f6%a3%f7%a3%f8%a3%f9%a3%fa (52)\r\n" { pass $test } +} +send "£Á£Â£Ã£Ä£Å£Æ£Ç£È£É£Ê£Ë£Ì£Í£Î£Ï£Ð£Ñ£Ò£Ó£Ô£Õ£Ö£×£Ø£Ù£Ú\r" +expect { + -ex "%a3%c1%a3%c2%a3%c3%a3%c4%a3%c5%a3%c6%a3%c7%a3%c8%a3%c9%a3%ca%a3%cb%a3%cc%a3%cd%a3%ce%a3%cf%a3%d0%a3%d1%a3%d2%a3%d3%a3%d4%a3%d5%a3%d6%a3%d7%a3%d8%a3%d9%a3%da (52)\r\n" { pass $test } +} +send "abcdefghijklmnopqrstuvwxyz\r" +expect { + -ex "%61%62%63%64%65%66%67%68%69%6a%6b%6c%6d%6e%6f%70%71%72%73%74%75%76%77%78%79%7a (26)\r\n" { pass $test } +} +send "ABCDEFGHIJKLMNOPQRSTUVWXYZ\r" +expect { + -ex "%41%42%43%44%45%46%47%48%49%4a%4b%4c%4d%4e%4f%50%51%52%53%54%55%56%57%58%59%5a (26)\r\n" { pass $test } +} +send "0123456789\r" +expect { + -ex "%30%31%32%33%34%35%36%37%38%39 (10)\r\n" { pass $test } +} +send "£°£±£²£³£´£µ£¶£·£¸£¹\r" +expect { + -ex "%a3%b1%a3%b2%a3%b3%a3%b4%a3%b5%a3%b6%a3%b7%a3%b8%a3%b9 (20)\r\n" { pass $test } +} +send "¡¡ \r" +expect { + -ex "%a1%a1%20 (3)\r\n" { pass $test } +} +send "¥¢¥¤¥¦¥¨¥ª¥¬¥®¥°¥²¥´¥Ñ¥Ô¥×¥Ú¥Ý¥«¡«¥­¡«¥¯¡«¥±¡«¥³¡«¥Ï¡¬¥Ò¡¬¥Õ¡¬¥Ø¡¬¥Û¡¬\r" +expect { + -ex "%a5%a2%a5%a4%a5%a6%a5%a8%a5%aa%a5%ac%a5%ae%a5%b0%a5%b2%a5%b4%a5%d1%a5%d4%a5%d7%a5%da%a5%dd%a5%ab%8e%de%a5%ad%8e%de%a5%af%8e%de%a5%b1%8e%de%a5%b3%8e%de%a5%cf%8e%df%a5%d2%8e%df%a5%d5%8e%df%a5%d8%8e%df%a5%db%8e%df (70)\r\n" { pass $test } +} +send "ޱ޲޳޴޵޶ŽÞŽ·ŽÞޏŽÞ޹ŽÞŽºŽÞŽÊŽßŽËŽßŽÌŽßŽÍŽßŽÎŽß\r" +expect { + -ex "%a4%a2%a4%a4%a4%a6%a4%a8%a4%aa%a4%ab%a1%ab%a4%ad%a1%ab%a4%af%a1%ab%a4%b1%a1%ab%a4%b3%a1%ab%a4%cf%a1%ac%a4%d2%a1%ac%a4%d5%a1%ac%a4%d8%a1%ac%a4%db%a1%ac (50)\r\n" { pass $test } +} +send "¤¢¤¤¤¦¤¨¤ª¤¬¤®¤°¤²¤´¤Ñ¤Ô¤×¤Ú¤Ý¤«¡«¤­¡«¤¯¡«¤±¡«¤³¡«¤Ï¡¬¤Ò¡¬¤Õ¡¬¤Ø¡¬¤Û¡¬\r" +expect { + -ex "%8e%b1%8e%b2%8e%b3%8e%b4%8e%b5%8e%b6%8e%de%8e%b7%8e%de%8e%b8%8e%de%8e%b9%8e%de%8e%ba%8e%de%8e%ca%8e%df%8e%cb%8e%df%8e%cc%8e%df%8e%cd%8e%df%8e%ce%8e%df%8e%b6%8e%de%8e%b7%8e%de%8e%b8%8e%de%8e%b9%8e%de%8e%ba%8e%de%8e%ca%8e%df%8e%cb%8e%df%8e%cc%8e%df%8e%cd%8e%df%8e%ce%8e%df (90)\r\n" { pass $test } +} +close + +set test "full-width katakanas to full-width hiragana" +spawn tests/conv_kana EUC-JP "c" +expect_after { + "\[^\r\n\]*\r\n" { fail $test } +} +send "£á£â£ã£ä£å£æ£ç£è£é£ê£ë£ì£í£î£ï£ð£ñ£ò£ó£ô£õ£ö£÷£ø£ù£ú\r" +expect { + -ex "%a3%e1%a3%e2%a3%e3%a3%e4%a3%e5%a3%e6%a3%e7%a3%e8%a3%e9%a3%ea%a3%eb%a3%ec%a3%ed%a3%ee%a3%ef%a3%f0%a3%f1%a3%f2%a3%f3%a3%f4%a3%f5%a3%f6%a3%f7%a3%f8%a3%f9%a3%fa (52)\r\n" { pass $test } +} +send "£Á£Â£Ã£Ä£Å£Æ£Ç£È£É£Ê£Ë£Ì£Í£Î£Ï£Ð£Ñ£Ò£Ó£Ô£Õ£Ö£×£Ø£Ù£Ú\r" +expect { + -ex "%a3%c1%a3%c2%a3%c3%a3%c4%a3%c5%a3%c6%a3%c7%a3%c8%a3%c9%a3%ca%a3%cb%a3%cc%a3%cd%a3%ce%a3%cf%a3%d0%a3%d1%a3%d2%a3%d3%a3%d4%a3%d5%a3%d6%a3%d7%a3%d8%a3%d9%a3%da (52)\r\n" { pass $test } +} +send "abcdefghijklmnopqrstuvwxyz\r" +expect { + -ex "%61%62%63%64%65%66%67%68%69%6a%6b%6c%6d%6e%6f%70%71%72%73%74%75%76%77%78%79%7a (26)\r\n" { pass $test } +} +send "ABCDEFGHIJKLMNOPQRSTUVWXYZ\r" +expect { + -ex "%41%42%43%44%45%46%47%48%49%4a%4b%4c%4d%4e%4f%50%51%52%53%54%55%56%57%58%59%5a (26)\r\n" { pass $test } +} +send "0123456789\r" +expect { + -ex "%30%31%32%33%34%35%36%37%38%39 (10)\r\n" { pass $test } +} +send "£°£±£²£³£´£µ£¶£·£¸£¹\r" +expect { + -ex "%a3%b1%a3%b2%a3%b3%a3%b4%a3%b5%a3%b6%a3%b7%a3%b8%a3%b9 (20)\r\n" { pass $test } +} +send "¡¡ \r" +expect { + -ex "%a1%a1%20 (3)\r\n" { pass $test } +} +send "¥¢¥¤¥¦¥¨¥ª¥¬¥®¥°¥²¥´¥Ñ¥Ô¥×¥Ú¥Ý¥«¡«¥­¡«¥¯¡«¥±¡«¥³¡«¥Ï¡¬¥Ò¡¬¥Õ¡¬¥Ø¡¬¥Û¡¬\r" +expect { + -ex "%a4%a2%a4%a4%a4%a6%a4%a8%a4%aa%a4%ac%a4%ae%a4%b0%a4%b2%a4%b4%a4%d1%a4%d4%a4%d7%a4%da%a4%dd%a4%ab%a1%ab%a4%ad%a1%ab%a4%af%a1%ab%a4%b1%a1%ab%a4%b3%a1%ab%a4%cf%a1%ac%a4%d2%a1%ac%a4%d5%a1%ac%a4%d8%a1%ac%a4%db%a1%ac (70)\r\n" { pass $test } +} +send "ޱ޲޳޴޵޶ŽÞŽ·ŽÞޏŽÞ޹ŽÞŽºŽÞŽÊŽßŽËŽßŽÌŽßŽÍŽßŽÎŽß\r" +expect { + -ex "%8e%b1%8e%b2%8e%b3%8e%b4%8e%b5%8e%b6%8e%de%8e%b7%8e%de%8e%b8%8e%de%8e%b9%8e%de%8e%ba%8e%de%8e%ca%8e%df%8e%cb%8e%df%8e%cc%8e%df%8e%cd%8e%df%8e%ce%8e%df (50)\r\n" { pass $test } +} +send "¤¢¤¤¤¦¤¨¤ª¤¬¤®¤°¤²¤´¤Ñ¤Ô¤×¤Ú¤Ý¤«¡«¤­¡«¤¯¡«¤±¡«¤³¡«¤Ï¡¬¤Ò¡¬¤Õ¡¬¤Ø¡¬¤Û¡¬\r" +expect { + -ex "%a4%a2%a4%a4%a4%a6%a4%a8%a4%aa%a4%ac%a4%ae%a4%b0%a4%b2%a4%b4%a4%d1%a4%d4%a4%d7%a4%da%a4%dd%a4%ab%a1%ab%a4%ad%a1%ab%a4%af%a1%ab%a4%b1%a1%ab%a4%b3%a1%ab%a4%cf%a1%ac%a4%d2%a1%ac%a4%d5%a1%ac%a4%d8%a1%ac%a4%db%a1%ac (70)\r\n" { pass $test } +} +close + +set test "full-width hiraganas to full-width katakana" +spawn tests/conv_kana EUC-JP "C" +expect_after { + "\[^\r\n\]*\r\n" { fail $test } +} +send "£á£â£ã£ä£å£æ£ç£è£é£ê£ë£ì£í£î£ï£ð£ñ£ò£ó£ô£õ£ö£÷£ø£ù£ú\r" +expect { + -ex "%a3%e1%a3%e2%a3%e3%a3%e4%a3%e5%a3%e6%a3%e7%a3%e8%a3%e9%a3%ea%a3%eb%a3%ec%a3%ed%a3%ee%a3%ef%a3%f0%a3%f1%a3%f2%a3%f3%a3%f4%a3%f5%a3%f6%a3%f7%a3%f8%a3%f9%a3%fa (52)\r\n" { pass $test } +} +send "£Á£Â£Ã£Ä£Å£Æ£Ç£È£É£Ê£Ë£Ì£Í£Î£Ï£Ð£Ñ£Ò£Ó£Ô£Õ£Ö£×£Ø£Ù£Ú\r" +expect { + -ex "%a3%c1%a3%c2%a3%c3%a3%c4%a3%c5%a3%c6%a3%c7%a3%c8%a3%c9%a3%ca%a3%cb%a3%cc%a3%cd%a3%ce%a3%cf%a3%d0%a3%d1%a3%d2%a3%d3%a3%d4%a3%d5%a3%d6%a3%d7%a3%d8%a3%d9%a3%da (52)\r\n" { pass $test } +} +send "0123456789\r" +expect { + -ex "%30%31%32%33%34%35%36%37%38%39 (10)\r\n" { pass $test } +} +send "£°£±£²£³£´£µ£¶£·£¸£¹\r" +expect { + -ex "%a3%b1%a3%b2%a3%b3%a3%b4%a3%b5%a3%b6%a3%b7%a3%b8%a3%b9 (20)\r\n" { pass $test } +} +send "¡¡ \r" +expect { + -ex "%a1%a1%20 (3)\r\n" { pass $test } +} +send "¥¢¥¤¥¦¥¨¥ª¥¬¥®¥°¥²¥´¥Ñ¥Ô¥×¥Ú¥Ý¥«¡«¥­¡«¥¯¡«¥±¡«¥³¡«¥Ï¡¬¥Ò¡¬¥Õ¡¬¥Ø¡¬¥Û¡¬\r" +expect { + -ex "%a5%a2%a5%a4%a5%a6%a5%a8%a5%aa%a5%ac%a5%ae%a5%b0%a5%b2%a5%b4%a5%d1%a5%d4%a5%d7%a5%da%a5%dd%a5%ab%a1%ab%a5%ad%a1%ab%a5%af%a1%ab%a5%b1%a1%ab%a5%b3%a1%ab%a5%cf%a1%ac%a5%d2%a1%ac%a5%d5%a1%ac%a5%d8%a1%ac%a5%db%a1%ac (70)\r\n" { pass $test } +} +send "ޱ޲޳޴޵޶ŽÞŽ·ŽÞޏŽÞ޹ŽÞŽºŽÞŽÊŽßŽËŽßŽÌŽßŽÍŽßŽÎŽß\r" +expect { + -ex "%8e%b1%8e%b2%8e%b3%8e%b4%8e%b5%8e%b6%8e%de%8e%b7%8e%de%8e%b8%8e%de%8e%b9%8e%de%8e%ba%8e%de%8e%ca%8e%df%8e%cb%8e%df%8e%cc%8e%df%8e%cd%8e%df%8e%ce%8e%df (50)\r\n" { pass $test } +} +send "¤¢¤¤¤¦¤¨¤ª¤¬¤®¤°¤²¤´¤Ñ¤Ô¤×¤Ú¤Ý¤«¡«¤­¡«¤¯¡«¤±¡«¤³¡«¤Ï¡¬¤Ò¡¬¤Õ¡¬¤Ø¡¬¤Û¡¬\r" +expect { + -ex "%a5%a2%a5%a4%a5%a6%a5%a8%a5%aa%a5%ac%a5%ae%a5%b0%a5%b2%a5%b4%a5%d1%a5%d4%a5%d7%a5%da%a5%dd%a5%ab%a1%ab%a5%ad%a1%ab%a5%af%a1%ab%a5%b1%a1%ab%a5%b3%a1%ab%a5%cf%a1%ac%a5%d2%a1%ac%a5%d5%a1%ac%a5%d8%a1%ac%a5%db%a1%ac (70)\r\n" { pass $test } +} +close + +set test "conversion between full-width hiraganas and full-width katakanas, flags for both directions are specified at a time" +spawn tests/conv_kana EUC-JP "Cc" +expect_after { + "\[^\r\n\]*\r\n" { fail $test } +} +send "£á£â£ã£ä£å£æ£ç£è£é£ê£ë£ì£í£î£ï£ð£ñ£ò£ó£ô£õ£ö£÷£ø£ù£ú\r" +expect { + -ex "%a3%e1%a3%e2%a3%e3%a3%e4%a3%e5%a3%e6%a3%e7%a3%e8%a3%e9%a3%ea%a3%eb%a3%ec%a3%ed%a3%ee%a3%ef%a3%f0%a3%f1%a3%f2%a3%f3%a3%f4%a3%f5%a3%f6%a3%f7%a3%f8%a3%f9%a3%fa (52)\r\n" { pass $test } +} +send "£Á£Â£Ã£Ä£Å£Æ£Ç£È£É£Ê£Ë£Ì£Í£Î£Ï£Ð£Ñ£Ò£Ó£Ô£Õ£Ö£×£Ø£Ù£Ú\r" +expect { + -ex "%a3%c1%a3%c2%a3%c3%a3%c4%a3%c5%a3%c6%a3%c7%a3%c8%a3%c9%a3%ca%a3%cb%a3%cc%a3%cd%a3%ce%a3%cf%a3%d0%a3%d1%a3%d2%a3%d3%a3%d4%a3%d5%a3%d6%a3%d7%a3%d8%a3%d9%a3%da (52)\r\n" { pass $test } +} +send "abcdefghijklmnopqrstuvwxyz\r" +expect { + -ex "%61%62%63%64%65%66%67%68%69%6a%6b%6c%6d%6e%6f%70%71%72%73%74%75%76%77%78%79%7a (26)\r\n" { pass $test } +} +send "ABCDEFGHIJKLMNOPQRSTUVWXYZ\r" +expect { + -ex "%41%42%43%44%45%46%47%48%49%4a%4b%4c%4d%4e%4f%50%51%52%53%54%55%56%57%58%59%5a (26)\r\n" { pass $test } +} +send "0123456789\r" +expect { + -ex "%30%31%32%33%34%35%36%37%38%39 (10)\r\n" { pass $test } +} +send "£°£±£²£³£´£µ£¶£·£¸£¹\r" +expect { + -ex "%a3%b1%a3%b2%a3%b3%a3%b4%a3%b5%a3%b6%a3%b7%a3%b8%a3%b9 (20)\r\n" { pass $test } +} +send "¡¡ \r" +expect { + -ex "%a1%a1%20 (3)\r\n" { pass $test } +} +send "¥¢¥¤¥¦¥¨¥ª¥¬¥®¥°¥²¥´¥Ñ¥Ô¥×¥Ú¥Ý¥«¡«¥­¡«¥¯¡«¥±¡«¥³¡«¥Ï¡¬¥Ò¡¬¥Õ¡¬¥Ø¡¬¥Û¡¬\r" +expect { + -ex "%a4%a2%a4%a4%a4%a6%a4%a8%a4%aa%a4%ac%a4%ae%a4%b0%a4%b2%a4%b4%a4%d1%a4%d4%a4%d7%a4%da%a4%dd%a4%ab%a1%ab%a4%ad%a1%ab%a4%af%a1%ab%a4%b1%a1%ab%a4%b3%a1%ab%a4%cf%a1%ac%a4%d2%a1%ac%a4%d5%a1%ac%a4%d8%a1%ac%a4%db%a1%ac (70)\r\n" { pass $test } +} +send "ޱ޲޳޴޵޶ŽÞŽ·ŽÞޏŽÞ޹ŽÞŽºŽÞŽÊŽßŽËŽßŽÌŽßŽÍŽßŽÎŽß\r" +expect { + -ex "%8e%b1%8e%b2%8e%b3%8e%b4%8e%b5%8e%b6%8e%de%8e%b7%8e%de%8e%b8%8e%de%8e%b9%8e%de%8e%ba%8e%de%8e%ca%8e%df%8e%cb%8e%df%8e%cc%8e%df%8e%cd%8e%df%8e%ce%8e%df (50)\r\n" { pass $test } +} +send "¤¢¤¤¤¦¤¨¤ª¤¬¤®¤°¤²¤´¤Ñ¤Ô¤×¤Ú¤Ý¤«¡«¤­¡«¤¯¡«¤±¡«¤³¡«¤Ï¡¬¤Ò¡¬¤Õ¡¬¤Ø¡¬¤Û¡¬\r" +expect { + -ex "%a5%a2%a5%a4%a5%a6%a5%a8%a5%aa%a5%ac%a5%ae%a5%b0%a5%b2%a5%b4%a5%d1%a5%d4%a5%d7%a5%da%a5%dd%a5%ab%a1%ab%a5%ad%a1%ab%a5%af%a1%ab%a5%b1%a1%ab%a5%b3%a1%ab%a5%cf%a1%ac%a5%d2%a1%ac%a5%d5%a1%ac%a5%d8%a1%ac%a5%db%a1%ac (70)\r\n" { pass $test } +} +close + +set test "half-width katakanas to full-width katakanas, with voiced marks combined" +spawn tests/conv_kana EUC-JP "KV" +expect_after { + "\[^\r\n\]*\r\n" { fail $test } +} +send "£á£â£ã£ä£å£æ£ç£è£é£ê£ë£ì£í£î£ï£ð£ñ£ò£ó£ô£õ£ö£÷£ø£ù£ú\r" +expect { + -ex "%a3%e1%a3%e2%a3%e3%a3%e4%a3%e5%a3%e6%a3%e7%a3%e8%a3%e9%a3%ea%a3%eb%a3%ec%a3%ed%a3%ee%a3%ef%a3%f0%a3%f1%a3%f2%a3%f3%a3%f4%a3%f5%a3%f6%a3%f7%a3%f8%a3%f9%a3%fa (52)\r\n" { pass $test } +} +send "£Á£Â£Ã£Ä£Å£Æ£Ç£È£É£Ê£Ë£Ì£Í£Î£Ï£Ð£Ñ£Ò£Ó£Ô£Õ£Ö£×£Ø£Ù£Ú\r" +expect { + -ex "%a3%c1%a3%c2%a3%c3%a3%c4%a3%c5%a3%c6%a3%c7%a3%c8%a3%c9%a3%ca%a3%cb%a3%cc%a3%cd%a3%ce%a3%cf%a3%d0%a3%d1%a3%d2%a3%d3%a3%d4%a3%d5%a3%d6%a3%d7%a3%d8%a3%d9%a3%da (52)\r\n" { pass $test } +} +send "abcdefghijklmnopqrstuvwxyz\r" +expect { + -ex "%61%62%63%64%65%66%67%68%69%6a%6b%6c%6d%6e%6f%70%71%72%73%74%75%76%77%78%79%7a (26)\r\n" { pass $test } +} +send "ABCDEFGHIJKLMNOPQRSTUVWXYZ\r" +expect { + -ex "%41%42%43%44%45%46%47%48%49%4a%4b%4c%4d%4e%4f%50%51%52%53%54%55%56%57%58%59%5a (26)\r\n" { pass $test } +} +send "0123456789\r" +expect { + -ex "%30%31%32%33%34%35%36%37%38%39 (10)\r\n" { pass $test } +} +send "£°£±£²£³£´£µ£¶£·£¸£¹\r" +expect { + -ex "%a3%b1%a3%b2%a3%b3%a3%b4%a3%b5%a3%b6%a3%b7%a3%b8%a3%b9 (20)\r\n" { pass $test } +} +send "¡¡ \r" +expect { + -ex "%a1%a1%20 (3)\r\n" { pass $test } +} +send "¥¢¥¤¥¦¥¨¥ª¥¬¥®¥°¥²¥´¥Ñ¥Ô¥×¥Ú¥Ý¥«¡«¥­¡«¥¯¡«¥±¡«¥³¡«¥Ï¡¬¥Ò¡¬¥Õ¡¬¥Ø¡¬¥Û¡¬\r" +expect { + -ex "%a5%a2%a5%a4%a5%a6%a5%a8%a5%aa%a5%ac%a5%ae%a5%b0%a5%b2%a5%b4%a5%d1%a5%d4%a5%d7%a5%da%a5%dd%a5%ab%a1%ab%a5%ad%a1%ab%a5%af%a1%ab%a5%b1%a1%ab%a5%b3%a1%ab%a5%cf%a1%ac%a5%d2%a1%ac%a5%d5%a1%ac%a5%d8%a1%ac%a5%db%a1%ac (70)\r\n" { pass $test } +} +send "ޱ޲޳޴޵޶ŽÞŽ·ŽÞޏŽÞ޹ŽÞŽºŽÞŽÊŽßŽËŽßŽÌŽßŽÍŽßŽÎŽß\r" +expect { + -ex "%a5%a2%a5%a4%a5%a6%a5%a8%a5%aa%a5%ac%a5%ae%a5%b0%a5%b2%a5%b4%a5%d1%a5%d4%a5%d7%a5%da%a5%dd (30)\r\n" { pass $test } +} +send "¤¢¤¤¤¦¤¨¤ª¤¬¤®¤°¤²¤´¤Ñ¤Ô¤×¤Ú¤Ý¤«¡«¤­¡«¤¯¡«¤±¡«¤³¡«¤Ï¡¬¤Ò¡¬¤Õ¡¬¤Ø¡¬¤Û¡¬\r" +expect { + -ex "%a4%a2%a4%a4%a4%a6%a4%a8%a4%aa%a4%ac%a4%ae%a4%b0%a4%b2%a4%b4%a4%d1%a4%d4%a4%d7%a4%da%a4%dd%a4%ab%a1%ab%a4%ad%a1%ab%a4%af%a1%ab%a4%b1%a1%ab%a4%b3%a1%ab%a4%cf%a1%ac%a4%d2%a1%ac%a4%d5%a1%ac%a4%d8%a1%ac%a4%db%a1%ac (70)\r\n" { pass $test } +} +close + +set test "half-width katakanas to full-width hiraganas, with voiced marks combined" +spawn tests/conv_kana EUC-JP "HV" +expect_after { + "\[^\r\n\]*\r\n" { fail $test } +} +send "£á£â£ã£ä£å£æ£ç£è£é£ê£ë£ì£í£î£ï£ð£ñ£ò£ó£ô£õ£ö£÷£ø£ù£ú\r" +expect { + -ex "%a3%e1%a3%e2%a3%e3%a3%e4%a3%e5%a3%e6%a3%e7%a3%e8%a3%e9%a3%ea%a3%eb%a3%ec%a3%ed%a3%ee%a3%ef%a3%f0%a3%f1%a3%f2%a3%f3%a3%f4%a3%f5%a3%f6%a3%f7%a3%f8%a3%f9%a3%fa (52)\r\n" { pass $test } +} +send "£Á£Â£Ã£Ä£Å£Æ£Ç£È£É£Ê£Ë£Ì£Í£Î£Ï£Ð£Ñ£Ò£Ó£Ô£Õ£Ö£×£Ø£Ù£Ú\r" +expect { + -ex "%a3%c1%a3%c2%a3%c3%a3%c4%a3%c5%a3%c6%a3%c7%a3%c8%a3%c9%a3%ca%a3%cb%a3%cc%a3%cd%a3%ce%a3%cf%a3%d0%a3%d1%a3%d2%a3%d3%a3%d4%a3%d5%a3%d6%a3%d7%a3%d8%a3%d9%a3%da (52)\r\n" { pass $test } +} +send "abcdefghijklmnopqrstuvwxyz\r" +expect { + -ex "%61%62%63%64%65%66%67%68%69%6a%6b%6c%6d%6e%6f%70%71%72%73%74%75%76%77%78%79%7a (26)\r\n" { pass $test } +} +send "ABCDEFGHIJKLMNOPQRSTUVWXYZ\r" +expect { + -ex "%41%42%43%44%45%46%47%48%49%4a%4b%4c%4d%4e%4f%50%51%52%53%54%55%56%57%58%59%5a (26)\r\n" { pass $test } +} +send "0123456789\r" +expect { + -ex "%30%31%32%33%34%35%36%37%38%39 (10)\r\n" { pass $test } +} +send "£°£±£²£³£´£µ£¶£·£¸£¹\r" +expect { + -ex "%a3%b1%a3%b2%a3%b3%a3%b4%a3%b5%a3%b6%a3%b7%a3%b8%a3%b9 (20)\r\n" { pass $test } +} +send "¡¡ \r" +expect { + -ex "%a1%a1%20 (3)\r\n" { pass $test } +} +send "¥¢¥¤¥¦¥¨¥ª¥¬¥®¥°¥²¥´¥Ñ¥Ô¥×¥Ú¥Ý¥«¡«¥­¡«¥¯¡«¥±¡«¥³¡«¥Ï¡¬¥Ò¡¬¥Õ¡¬¥Ø¡¬¥Û¡¬\r" +expect { + -ex "%a5%a2%a5%a4%a5%a6%a5%a8%a5%aa%a5%ac%a5%ae%a5%b0%a5%b2%a5%b4%a5%d1%a5%d4%a5%d7%a5%da%a5%dd%a5%ab%a1%ab%a5%ad%a1%ab%a5%af%a1%ab%a5%b1%a1%ab%a5%b3%a1%ab%a5%cf%a1%ac%a5%d2%a1%ac%a5%d5%a1%ac%a5%d8%a1%ac%a5%db%a1%ac (70)\r\n" { pass $test } +} +send "ޱ޲޳޴޵޶ŽÞŽ·ŽÞޏŽÞ޹ŽÞŽºŽÞŽÊŽßŽËŽßŽÌŽßŽÍŽßŽÎŽß\r" +expect { + -ex "%a4%a2%a4%a4%a4%a6%a4%a8%a4%aa%a4%ac%a4%ae%a4%b0%a4%b2%a4%b4%a4%d1%a4%d4%a4%d7%a4%da%a4%dd (30)\r\n" { pass $test } +} +send "¤¢¤¤¤¦¤¨¤ª¤¬¤®¤°¤²¤´¤Ñ¤Ô¤×¤Ú¤Ý¤«¡«¤­¡«¤¯¡«¤±¡«¤³¡«¤Ï¡¬¤Ò¡¬¤Õ¡¬¤Ø¡¬¤Û¡¬\r" +expect { + -ex "%a4%a2%a4%a4%a4%a6%a4%a8%a4%aa%a4%ac%a4%ae%a4%b0%a4%b2%a4%b4%a4%d1%a4%d4%a4%d7%a4%da%a4%dd%a4%ab%a1%ab%a4%ad%a1%ab%a4%af%a1%ab%a4%b1%a1%ab%a4%b3%a1%ab%a4%cf%a1%ac%a4%d2%a1%ac%a4%d5%a1%ac%a4%d8%a1%ac%a4%db%a1%ac (70)\r\n" { pass $test } +} +close + + +# vim: sts=4 ts=4 sw=4 et encoding=EUC-JP diff --git a/ext/mbstring/libmbfl/tests/strwidth.tests/strwidth.exp b/ext/mbstring/libmbfl/tests/strwidth.tests/strwidth.exp new file mode 100644 index 0000000000..09f518c07b --- /dev/null +++ b/ext/mbstring/libmbfl/tests/strwidth.tests/strwidth.exp @@ -0,0 +1,47 @@ +#!/usr/bin/expect -f +spawn tests/strwidth Japanese UTF-8 +set timeout 1 + +expect_after { + "\[^\r\n\]*\r\n" { fail $test } +} + +set test "basic test" +send "testtest\r" +expect { + "8\r\n" { pass $test } +} + +set test "CJK kanji" +send "漢字\r" +expect { + "4\r\n" { pass $test } +} + +set test "CJK hiragana" +send "ひらがな\r" +expect { + "8\r\n" { pass $test } +} + +set test "CJK katakana" +send "カタカナ\r" +expect { + "8\r\n" { pass $test } +} + +set test "Fullwidth symbols (1)" +send "〜!”#$%&’())\r" +expect { + "20\r\n" { pass $test } +} + +set test "Halfwidth symbols assumed to be fullwidth in JISX0208 (2)" +send "○●◎\r" +expect { + "3\r\n" { pass $test } +} + +close + +# vim: sts=4 sw=4 ts=4 et encoding=UTF-8 diff --git a/ext/mbstring/mbstring.c b/ext/mbstring/mbstring.c index e7e5c2abe5..c73b9d8f93 100644 --- a/ext/mbstring/mbstring.c +++ b/ext/mbstring/mbstring.c @@ -151,9 +151,16 @@ static const enum mbfl_no_encoding php_mb_default_identify_list_hy[] = { static const enum mbfl_no_encoding php_mb_default_identify_list_tr[] = { mbfl_no_encoding_ascii, mbfl_no_encoding_utf8, + mbfl_no_encoding_cp1254, mbfl_no_encoding_8859_9 }; +static const enum mbfl_no_encoding php_mb_default_identify_list_ua[] = { + mbfl_no_encoding_ascii, + mbfl_no_encoding_utf8, + mbfl_no_encoding_koi8u +}; + static const enum mbfl_no_encoding php_mb_default_identify_list_neut[] = { mbfl_no_encoding_ascii, mbfl_no_encoding_utf8 @@ -168,6 +175,7 @@ static const php_mb_nls_ident_list php_mb_default_identify_list[] = { { mbfl_no_language_russian, php_mb_default_identify_list_ru, sizeof(php_mb_default_identify_list_ru) / sizeof(php_mb_default_identify_list_ru[0]) }, { mbfl_no_language_armenian, php_mb_default_identify_list_hy, sizeof(php_mb_default_identify_list_hy) / sizeof(php_mb_default_identify_list_hy[0]) }, { mbfl_no_language_turkish, php_mb_default_identify_list_tr, sizeof(php_mb_default_identify_list_tr) / sizeof(php_mb_default_identify_list_tr[0]) }, + { mbfl_no_language_ukrainian, php_mb_default_identify_list_ua, sizeof(php_mb_default_identify_list_ua) / sizeof(php_mb_default_identify_list_ua[0]) }, { mbfl_no_language_neutral, php_mb_default_identify_list_neut, sizeof(php_mb_default_identify_list_neut) / sizeof(php_mb_default_identify_list_neut[0]) } }; @@ -360,7 +368,7 @@ ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_detect_encoding, 0, 0, 1) ZEND_ARG_INFO(0, strict) ZEND_END_ARG_INFO() -ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_list_encodings, 0, 0, 0) +ZEND_BEGIN_ARG_INFO(arginfo_mb_list_encodings, 0) ZEND_END_ARG_INFO() ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_encoding_aliases, 0, 0, 1) @@ -1035,75 +1043,72 @@ static PHP_INI_MH(OnUpdate_mbstring_http_output) int _php_mb_ini_mbstring_internal_encoding_set(const char *new_value, uint new_value_length TSRMLS_DC) { enum mbfl_no_encoding no_encoding; - const char *enc_name = NULL; - uint enc_name_len = 0; - - no_encoding = new_value ? mbfl_name2no_encoding(new_value): - mbfl_no_encoding_invalid; + const char *enc_name = NULL; + uint enc_name_len = 0; + + no_encoding = new_value ? mbfl_name2no_encoding(new_value): + mbfl_no_encoding_invalid; if (no_encoding != mbfl_no_encoding_invalid) { - enc_name = new_value; - enc_name_len = new_value_length; - } else { - switch (MBSTRG(language)) { - case mbfl_no_language_uni: - enc_name = "UTF-8"; - enc_name_len = sizeof("UTF-8") - 1; - break; - case mbfl_no_language_japanese: - enc_name = "EUC-JP"; - enc_name_len = sizeof("EUC-JP") - 1; - break; - case mbfl_no_language_korean: - enc_name = "EUC-KR"; - enc_name_len = sizeof("EUC-KR") - 1; - break; - case mbfl_no_language_simplified_chinese: - enc_name = "EUC-CN"; - enc_name_len = sizeof("EUC-CN") - 1; - break; - case mbfl_no_language_traditional_chinese: - enc_name = "EUC-TW"; - enc_name_len = sizeof("EUC-TW") - 1; - break; - case mbfl_no_language_russian: - enc_name = "KOI8-R"; - enc_name_len = sizeof("KOI8-R") - 1; - break; - case mbfl_no_language_german: - enc_name = "ISO-8859-15"; - enc_name_len = sizeof("ISO-8859-15") - 1; - break; - case mbfl_no_language_armenian: - enc_name = "ArmSCII-8"; - enc_name_len = sizeof("ArmSCII-8") - 1; - break; - case mbfl_no_language_turkish: - enc_name = "ISO-8859-9"; - enc_name_len = sizeof("ISO-8859-9") - 1; - break; - default: - enc_name = "ISO-8859-1"; - enc_name_len = sizeof("ISO-8859-1") - 1; - break; - } - no_encoding = mbfl_name2no_encoding(enc_name); - } - MBSTRG(internal_encoding) = no_encoding; - MBSTRG(current_internal_encoding) = no_encoding; + enc_name = new_value; + enc_name_len = new_value_length; + } else { + switch (MBSTRG(language)) { + case mbfl_no_language_uni: + enc_name = "UTF-8"; + enc_name_len = sizeof("UTF-8") - 1; + break; + case mbfl_no_language_japanese: + enc_name = "EUC-JP"; + enc_name_len = sizeof("EUC-JP") - 1; + break; + case mbfl_no_language_korean: + enc_name = "EUC-KR"; + enc_name_len = sizeof("EUC-KR") - 1; + break; + case mbfl_no_language_simplified_chinese: + enc_name = "EUC-CN"; + enc_name_len = sizeof("EUC-CN") - 1; + break; + case mbfl_no_language_traditional_chinese: + enc_name = "EUC-TW"; + enc_name_len = sizeof("EUC-TW") - 1; + break; + case mbfl_no_language_russian: + enc_name = "KOI8-R"; + enc_name_len = sizeof("KOI8-R") - 1; + break; + case mbfl_no_language_german: + enc_name = "ISO-8859-15"; + enc_name_len = sizeof("ISO-8859-15") - 1; + break; + case mbfl_no_language_armenian: + enc_name = "ArmSCII-8"; + enc_name_len = sizeof("ArmSCII-8") - 1; + break; + case mbfl_no_language_turkish: + enc_name = "ISO-8859-9"; + enc_name_len = sizeof("ISO-8859-9") - 1; + break; + default: + enc_name = "ISO-8859-1"; + enc_name_len = sizeof("ISO-8859-1") - 1; + break; + } + no_encoding = mbfl_name2no_encoding(enc_name); + } + MBSTRG(internal_encoding) = no_encoding; + MBSTRG(current_internal_encoding) = no_encoding; #if HAVE_MBREGEX { - const char *_enc_name = enc_name; - if (FAILURE == php_mb_regex_set_default_mbctype(_enc_name TSRMLS_CC)) { + const char *enc_name = new_value; + if (FAILURE == php_mb_regex_set_default_mbctype(enc_name TSRMLS_CC)) { /* falls back to EUC-JP if an unknown encoding name is given */ - _enc_name = "EUC-JP"; - php_mb_regex_set_default_mbctype(_enc_name TSRMLS_CC); + enc_name = "EUC-JP"; + php_mb_regex_set_default_mbctype(enc_name TSRMLS_CC); } - php_mb_regex_set_mbctype(_enc_name TSRMLS_CC); + php_mb_regex_set_mbctype(new_value TSRMLS_CC); } #endif -#ifdef ZEND_MULTIBYTE - zend_multibyte_set_internal_encoding(new_value, new_value_length TSRMLS_CC); -#endif /* ZEND_MULTIBYTE */ return SUCCESS; } /* }}} */ @@ -1253,28 +1258,22 @@ PHP_INI_BEGIN() PHP_INI_ENTRY("mbstring.script_encoding", NULL, PHP_INI_ALL, OnUpdate_mbstring_script_encoding) #endif /* ZEND_MULTIBYTE */ PHP_INI_ENTRY("mbstring.substitute_character", NULL, PHP_INI_ALL, OnUpdate_mbstring_substitute_character) - STD_PHP_INI_ENTRY("mbstring.func_overload", "0", - PHP_INI_SYSTEM | PHP_INI_PERDIR, - OnUpdateLong, - func_overload, - zend_mbstring_globals, mbstring_globals) - + STD_PHP_INI_ENTRY("mbstring.func_overload", "0", + PHP_INI_SYSTEM, OnUpdateLong, func_overload, zend_mbstring_globals, mbstring_globals) + STD_PHP_INI_BOOLEAN("mbstring.encoding_translation", "0", PHP_INI_SYSTEM | PHP_INI_PERDIR, OnUpdate_mbstring_encoding_translation, - encoding_translation, - zend_mbstring_globals, mbstring_globals) - + encoding_translation, zend_mbstring_globals, mbstring_globals) PHP_INI_ENTRY("mbstring.http_output_conv_mimetypes", - "^(text/|application/xhtml\\+xml)", - PHP_INI_ALL, - OnUpdate_mbstring_http_output_conv_mimetypes) + "^(text/|application/xhtml\\+xml)", + PHP_INI_ALL, + OnUpdate_mbstring_http_output_conv_mimetypes) STD_PHP_INI_BOOLEAN("mbstring.strict_detection", "0", PHP_INI_ALL, OnUpdateLong, - strict_detection, - zend_mbstring_globals, mbstring_globals) + strict_detection, zend_mbstring_globals, mbstring_globals) PHP_INI_END() /* }}} */ @@ -1333,11 +1332,9 @@ static PHP_GSHUTDOWN_FUNCTION(mbstring) if (mbstring_globals->detect_order_list) { free(mbstring_globals->detect_order_list); } - if (mbstring_globals->http_output_conv_mimetypes) { _php_mb_free_regex(mbstring_globals->http_output_conv_mimetypes); } - #if HAVE_MBREGEX php_mb_regex_globals_free(mbstring_globals->mb_regex_globals TSRMLS_CC); #endif @@ -1450,6 +1447,7 @@ PHP_RINIT_FUNCTION(mbstring) PHP_RINIT(mb_regex) (INIT_FUNC_ARGS_PASSTHRU); #endif #ifdef ZEND_MULTIBYTE + zend_multibyte_set_internal_encoding(mbfl_no_encoding2name(MBSTRG(internal_encoding)) TSRMLS_CC); php_mb_set_zend_encoding(TSRMLS_C); #endif /* ZEND_MULTIBYTE */ @@ -1578,7 +1576,7 @@ PHP_FUNCTION(mb_internal_encoding) #ifdef ZEND_MULTIBYTE /* TODO: make independent from mbstring.encoding_translation? */ if (MBSTRG(encoding_translation)) { - zend_multibyte_set_internal_encoding(name, name_len TSRMLS_CC); + zend_multibyte_set_internal_encoding(name TSRMLS_CC); } #endif /* ZEND_MULTIBYTE */ RETURN_TRUE; @@ -3071,18 +3069,13 @@ PHP_FUNCTION(mb_detect_encoding) /* }}} */ /* {{{ proto mixed mb_list_encodings() - Returns an array of all supported entity encodings or Returns the entity encoding as a string */ + Returns an array of all supported entity encodings */ PHP_FUNCTION(mb_list_encodings) { const mbfl_encoding **encodings; const mbfl_encoding *encoding; int i; - if (ZEND_NUM_ARGS() != 0) { - RETVAL_FALSE; - ZEND_WRONG_PARAM_COUNT(); - } - array_init(return_value); i = 0; encodings = mbfl_get_supported_encodings(); @@ -3319,8 +3312,8 @@ PHP_FUNCTION(mb_convert_variables) int n, to_enc_len, argc, stack_level, stack_max, elistsz; enum mbfl_no_encoding *elist; char *name, *to_enc; - void *ptmp; - + void *ptmp; + if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "sZ+", &to_enc, &to_enc_len, &zfrom_enc, &args, &argc) == FAILURE) { return; } @@ -3485,7 +3478,7 @@ detect_end: ret = mbfl_buffer_converter_feed_result(convd, &string, &result); if (ret != NULL) { if (Z_REFCOUNT_PP(hash_entry) > 1) { - Z_DELREF_P(*hash_entry); + Z_DELREF_PP(hash_entry); MAKE_STD_ZVAL(*hash_entry); } else { zval_dtor(*hash_entry); @@ -3864,7 +3857,7 @@ PHP_FUNCTION(mb_send_mail) smart_str *s; extern void mbfl_memory_device_unput(mbfl_memory_device *device); char *pp, *ee; - + /* initialize */ mbfl_memory_device_init(&device, 0, 0); mbfl_string_init(&orig_str); @@ -4501,8 +4494,7 @@ MBSTRING_API size_t php_mb_gpc_mbchar_bytes(const char *s TSRMLS_DC) /* }}} */ /* {{{ MBSTRING_API int php_mb_gpc_encoding_converter() */ -MBSTRING_API int php_mb_gpc_encoding_converter(char **str, int *len, int num, const char *encoding_to, const char *encoding_from - TSRMLS_DC) +MBSTRING_API int php_mb_gpc_encoding_converter(char **str, int *len, int num, const char *encoding_to, const char *encoding_from TSRMLS_DC) { int i; mbfl_string string, result, *ret = NULL; @@ -4722,8 +4714,9 @@ MBSTRING_API int php_mb_stripos(int mode, const char *old_haystack, unsigned int /* }}} */ #ifdef ZEND_MULTIBYTE -/* {{{ MBSTRING_API int php_mb_set_zend_encoding() */ -MBSTRING_API int php_mb_set_zend_encoding(TSRMLS_D) + +/* {{{ php_mb_set_zend_encoding() */ +static int php_mb_set_zend_encoding(TSRMLS_D) { /* 'd better use mbfl_memory_device? */ char *name, *list = NULL; @@ -4763,7 +4756,7 @@ MBSTRING_API int php_mb_set_zend_encoding(TSRMLS_D) if (MBSTRG(encoding_translation)) { /* notify internal encoding to Zend Engine */ name = (char*)mbfl_no_encoding2name(MBSTRG(current_internal_encoding)); - zend_multibyte_set_internal_encoding(name, strlen(name) TSRMLS_CC); + zend_multibyte_set_internal_encoding(name TSRMLS_CC); } zend_multibyte_set_functions(encoding_detector, encoding_converter, encoding_oddlen TSRMLS_CC); @@ -4775,7 +4768,7 @@ MBSTRING_API int php_mb_set_zend_encoding(TSRMLS_D) /* {{{ char *php_mb_encoding_detector() * Interface for Zend Engine */ -char* php_mb_encoding_detector(const char *arg_string, int arg_length, char *arg_list TSRMLS_DC) +static char* php_mb_encoding_detector(const unsigned char *arg_string, size_t arg_length, char *arg_list TSRMLS_DC) { mbfl_string string; const char *ret; @@ -4798,7 +4791,7 @@ char* php_mb_encoding_detector(const char *arg_string, int arg_length, char *arg mbfl_string_init(&string); string.no_language = MBSTRG(language); - string.val = (char*)arg_string; + string.val = (unsigned char *)arg_string; string.len = arg_length; ret = mbfl_identify_encoding_name(&string, elist, size, 0); if (list != NULL) { @@ -4813,9 +4806,9 @@ char* php_mb_encoding_detector(const char *arg_string, int arg_length, char *arg /* }}} */ /* {{{ int php_mb_encoding_converter() */ -int php_mb_encoding_converter(char **to, int *to_length, const char *from, - int from_length, const char *encoding_to, const char *encoding_from - TSRMLS_DC) +static int php_mb_encoding_converter(unsigned char **to, size_t *to_length, + const unsigned char *from, size_t from_length, + const char *encoding_to, const char *encoding_from TSRMLS_DC) { mbfl_string string, result, *ret; enum mbfl_no_encoding from_encoding, to_encoding; @@ -4836,7 +4829,7 @@ int php_mb_encoding_converter(char **to, int *to_length, const char *from, mbfl_string_init(&result); string.no_encoding = from_encoding; string.no_language = MBSTRG(language); - string.val = (char*)from; + string.val = (unsigned char*)from; string.len = from_length; /* initialize converter */ @@ -4865,14 +4858,14 @@ int php_mb_encoding_converter(char **to, int *to_length, const char *from, * returns number of odd (e.g. appears only first byte of multibyte * character) chars */ -int php_mb_oddlen(const char *string, int length, const char *encoding TSRMLS_DC) +static size_t php_mb_oddlen(const unsigned char *string, size_t length, const char *encoding TSRMLS_DC) { mbfl_string mb_string; mbfl_string_init(&mb_string); mb_string.no_language = MBSTRG(language); mb_string.no_encoding = mbfl_name2no_encoding(encoding); - mb_string.val = (char*)string; + mb_string.val = (unsigned char *)string; mb_string.len = length; if (mb_string.no_encoding == mbfl_no_encoding_invalid) { diff --git a/ext/mbstring/mbstring.h b/ext/mbstring/mbstring.h index cd6cc63b3f..c536183538 100644 --- a/ext/mbstring/mbstring.h +++ b/ext/mbstring/mbstring.h @@ -217,16 +217,6 @@ struct mb_overload_def { #define MBSTRG(v) (mbstring_globals.v) #endif -#ifdef ZEND_MULTIBYTE -MBSTRING_API int php_mb_set_zend_encoding(TSRMLS_D); -char* php_mb_encoding_detector(const char *string, int length, char *list - TSRMLS_DC); -int php_mb_encoding_converter(char **to, int *to_length, const char *from, - int from_length, const char *encoding_to, const char *encoding_from - TSRMLS_DC); -int php_mb_oddlen(const char *string, int length, const char *encoding TSRMLS_DC); -#endif /* ZEND_MULTIBYTE */ - #else /* HAVE_MBSTRING */ #define mbstring_module_ptr NULL diff --git a/ext/mbstring/oniguruma/COPYING b/ext/mbstring/oniguruma/COPYING index ed3fa53b25..4d321bb93b 100644 --- a/ext/mbstring/oniguruma/COPYING +++ b/ext/mbstring/oniguruma/COPYING @@ -1,4 +1,4 @@ -OniGuruma LICENSE +Oniguruma LICENSE ----------------- When this software is partly used or it is distributed with Ruby, @@ -6,7 +6,7 @@ this of Ruby follows the license of Ruby. It follows the BSD license in the case of the one except for it. /*- - * Copyright (c) 2002-2004 K.Kosako + * Copyright (c) 2002-2006 K.Kosako * All rights reserved. * * Redistribution and use in source and binary forms, with or without diff --git a/ext/mbstring/oniguruma/HISTORY b/ext/mbstring/oniguruma/HISTORY index c648c54551..a1debefa49 100644 --- a/ext/mbstring/oniguruma/HISTORY +++ b/ext/mbstring/oniguruma/HISTORY @@ -1,5 +1,457 @@ History +2007/08/16: Version 4.7.1 + +2007/08/16: [test] success in ruby 1.9.0 (2007-04-06) [i686-linux]. +2007/07/04: [spec] (thanks K.Takata) + ONIG_OPTION_SINGLELINE: '$' -> '\Z' (as Perl) +2007/07/04: [dist] (thanks K.Takata) + fix documents API and API.ja. + +2007/06/18: Version 4.7.0 + +2007/06/18: [test] success in ruby 1.9.0 (2007-04-06) [i686-linux]. +2007/06/18: [bug] (thanks KUBO Takehiro) + WORD_ALIGNMENT_SIZE must be sizeof(OnigCodePoint). +2007/06/05: [impl] add #ifndef vsnprintf in regint.h. +2007/06/05: [bug] should check USE_CRNL_AS_LINE_TERMINATOR case + in onig_search(). + +2007/04/12: Version 4.6.2 + +2007/04/09: [impl] change STATE_CHECK_BUFF_MAX_SIZE value from 0x8000 + to 0x4000. +2007/03/26: [impl] add 'void' to function declarations. + +2007/03/06: Version 4.6.1 + +2007/03/06: [test] success in ruby 1.9.0 (2006-10-23) [i686-linux]. +2007/03/06: [bug] add #include for bcc32. + (In bcc32, alloca() is declared in malloc.h.) +2007/03/06: [impl] remove including version.h of Ruby. +2007/03/02: [bug] invalid optimization for semi-end-buf in onig_search(). + ex. /\n\Z/.match("aaaaaaaaaa\n") +2007/03/02: [impl] move range > start check position in end_buf process. + +2007/02/08: Version 4.6.0 + +2007/02/08: [test] success in ruby 1.9.0 (2006-10-23) [i686-linux]. +2007/01/09: [tune] select_opt_exact_info() didn't work for empty info. + ex. /.a/ make MAP info instead of EXACT info. +2006/12/29: [impl] add print_enc_string() for ONIG_DEBUG mode. +2006/12/22: [spec] should check too short multibyte char in parse_exp(). + add USE_PAD_TO_SHORT_BYTE_CHAR. + ex. /\x00/ in UTF16 should be error. + +2006/11/17: Version 4.5.1 + +2006/11/17: [test] success in ruby 1.9.0 (2006-10-23) [i686-linux]. +2006/11/15: [impl] remove CHECK_INTERRUPT. +2006/11/10: [bug] 0x24, 0x2b, 0x3c, 0x3d, 0x3e, 0x5e, 0x60, 0x7c, 0x7e + should be [:punct:]. +2006/11/08: [impl] rename QUALIFIER -> QUANTIFIER. +2006/11/07: [bug] (thanks Byte) + add 0xa3 <=> 0xb3 to CaseFoldMap[] for KOI8-R. + +2006/11/06: Version 4.5.0 + +2006/11/06: [test] success in ruby 1.9.0 (2006-10-23) [i686-linux]. +2006/11/06: [API] remove ONIGENC_AMBIGUOUS_MATCH_COMPOUND. +2006/11/06: [spec] change ONIG_OPTION_FIND_LONGEST to search all of + the string range. + add USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE. + +2006/10/30: Version 4.4.6 + +2006/10/30: [test] success in ruby 1.9.0 (2006-10-23) [i686-linux]. +2006/10/30: [impl] (thanks K.Takata) + add THREAD_SYSTEM_INIT and THREAD_SYSTEM_END. +2006/10/30: [bug] (thanks Wolfgang Nadasi-Donner) + invalid offset value was used in STATE_CHECK_BUFF_INIT(). + +2006/10/24: Version 4.4.5 + +2006/10/24: [test] success in ruby 1.9.0 (2006-10-23) [i686-linux]. +2006/10/24: [impl] escape -Wall warning. +2006/10/24: [tune] (thanks Kornelius Kalnbach) + String#scan for long string needs long time compare with + old Ruby + by initialization time for combination explosion check + ex. ("test " * 100_000).scan(/\w*\s?/) + change STATE_CHECK_BUFF_MAX_SIZE from 0x8000000 to 0x8000. + reduce initialization area of state_check_buff. +2006/10/16: [bug] (thanks Akinori Musha) + first argument of rb_warn() should be format string. +2006/10/10: [impl] add msa.state_check_buff_size initialization + in onig_search(). +2006/10/10: [bug] should call onig_st_free_table() in + onig_free_shared_cclass_table(). +2006/10/10: [impl] remove OP_WORD_SB and OP_WORD_MB. +2006/09/29: [impl] initialize state_check_buff_size in STATE_CHECK_BUFF_INIT(). + make valgrind happy. +2006/09/22: [impl] convert to ascii for parameter string in + onig_error_code_to_str(). + add enc member into OnigErrorInfo. + +2006/09/19: Version 4.4.4 + +2006/09/19: [test] success in ruby 1.9.0 (2006-08-22) [i686-linux]. +2006/09/19: [impl] (thanks KOYAMA Tetsuji) + HAVE_STDARG_PROTOTYPES was not defined in Mac OS X + by Xcode 2.4(gcc 4.0.1) problem. [php-dev 1312] etc... + +2006/09/15: Version 4.4.3 + +2006/09/15: [test] success in ruby 1.9.0 (2006-08-22) [i686-linux]. +2006/09/15: [bug] (thanks Allan Odgaard) + out of range access in bm_search_notrev(). + (p < s) + +2006/09/08: Version 4.4.2 + +2006/09/08: [test] success in ruby 1.9.0 (2006-08-22) [i686-linux]. +2006/09/08: [bug] (thanks K.Takata) + out of range access in bm_search_notrev(). +2006/09/04: [spec] (thanks K.Takata) + allow look-behind in negative look-behind. + ex. /(? (?:a*){n,n}, (?:a+){n,n} +2006/09/21: [impl] reduce (a*){n,m}, (a+){n,m} => (a*){n,n}, (a+){n,n} + if backreference is not used. +2006/08/17: [bug] should check scan_env.num_call > 0 for backrefed pattern + in combination explosion check. + +2006/08/17: Version 4.3.0 + +2006/08/17: [test] success in ruby 1.9.0 (2006-07-28) [i686-linux]. +2006/08/17: [new] add config USE_COMBINATION_EXPLOSION_CHECK. + check /(.+)*/, /(\s*foo\s*)*/ etc... + [API] add num_comb_exp_check member in regex_t. + [dist] change LTVERSION value to "1:0:0" in configure.in. +2006/08/15: [bug] OP_REPEAT_INC process in match_at(). + should check repeat-count >= range-upper and + range-upper may be infinite. + +2006/08/11: Version 4.2.3 + +2006/08/11: [test] success in ruby 1.9.0 (2006-07-28) [i686-linux]. +2006/08/10: [impl] remove double call in set_qualifier(). +2006/08/10: [impl] remove by_number member in QualifierNode. +2006/08/09: [impl] remove a comma at the end of enum ReduceType + for escape warning on Mac OS X. +2006/08/07: [impl] remove warning in regcomp.c. +2006/08/07: [spec] move definition of USE_BACKREF_AT_LEVEL into NOT_RUBY. + +2006/08/03: Version 4.2.2 + +2006/08/03: [test] success in ruby 1.9.0 (2006-07-28) [i686-linux]. +2006/08/03: [bug] (thanks Hiroyuki Yamamoto) + segmentation fault in regexec(). (POSIX API) +2006/08/02: [bug] combination of \G in look-ahead/look-behind and other + anchors(\A, \z, \Z) cause invalid result. + ex. /(?!\G)a\z/.match("ba") + start arg. of MATCH_ARG_INIT() should be original + arg. of onig_search(). + +2006/07/31: Version 4.2.1 + +2006/07/31: [test] success in ruby 1.9.0 (2006-07-28) [i686-linux]. +2006/07/31: [bug] (thanks Kimura Minoru) + re-implement bm_search_notrev(). +2006/07/31: [impl] bm_search_notrev() refactoring. +2006/07/31: [bug] (thanks Kimura Minoru) + fix incomplete multibyte string in exact info. +2006/07/31: [impl] (thanks Seiji Masugata) + remove cast in va_init_list() for Intel C Compiler. + +2006/07/18: Version 4.2.0 + +2006/07/18: [test] success in ruby 1.9.0 (2006-03-01) [i686-linux]. +2006/07/18: [new] (thanks Wolfgang Nadasi-Donner) + add back reference with nest level. + \k, \k +2006/07/11: [impl] change long to unsigned long for ONIG_OPTION_XXX + and ONIG_SYN_XXX number literals. + +2006/07/03: Version 4.1.2 + +2006/07/03: [test] success in ruby 1.9.0 (2006-03-01) [i686-linux]. +2006/07/03: [spec] (thanks Wolfgang Nadasi-Donner) + allow \G in look-behind. + add ANCHOR_BEGIN_POSITION flag in setup_tree(). +2006/06/12: [impl] (thanks matz) + fix cast from char* to const char* + in onig_snprintf_with_pattern(). + fix cast from char* to const char* + for PopularQStr[] and ReduceQStr[]. + +2006/05/22: Version 4.1.1 + +2006/05/22: [test] success in ruby 1.9.0 (2006-03-01) [i686-linux]. +2006/05/22: [impl] add position string argument to STACK_BASE_CHECK(). +2006/05/22: [bug] (thanks NARUSE, Yui) + add STK_NULL_CHECK_END to IS_TO_VOID_TARGET(). + ex. core dump in + /(?\(([^\(\)]++|\g)*+\))/.match('((a))') + +2006/05/15: Version 4.1.0 + +2006/05/15: [test] success in ruby 1.9.0 (2006-03-01) [i686-linux]. +2006/05/15: [impl] thread atomic changes for onig_end() and + onig_free_node_list(). +2006/05/15: [test] success in ruby 1.9.0 (2006-03-01) [i686-linux]. +2005/05/15: [dist] update API, API.ja, FAQ, FAQ.ja. +2006/05/15: [spec] remove onig_recompile(), onig_recompile_deluxe() + and re_recompile_pattern(). + add config USE_RECOMPILE_API. +2006/05/15: [impl] improved thread safe implementation of onig_search() + and onig_match(). + +2006/05/11: Version 4.0.4 + +2006/05/11: [test] success in ruby 1.9.0 (2006-03-01) [i686-linux]. +2006/05/11: [bug] (thanks Yuji Kaneda) + dead-lock in onig_end(). +2006/05/11: [dist] update index.html. + +2006/05/08: Version 4.0.3 + +2006/05/08: [test] success in ruby 1.9.0 (2006-03-01) [i686-linux]. +2006/05/08: [bug] (thanks Allan Odgaard) + Segmentation fault in backward search. + ex. /^\t.*$/ +2006/04/18: [dist] update index.html. +2006/04/05: [dist] update index.html. +2006/03/24: [dist] update doc/RE, doc/RE.ja. + +2006/03/23: Version 4.0.2 + +2006/03/22: [test] success in ruby 1.9.0 (2006-03-01) [i686-linux]. +2006/03/22: [impl] add both of ONIG_OPTION_DONT_CAPTURE_GROUP + and ONIG_OPTION_CAPTURE_GROUP check. +2006/03/22: [spec] add error code ONIGERR_INVALID_COMBINATION_OF_OPTIONS. +2006/03/22: [impl] remove USE_NAMED_GROUP condition from + ONIG_OPTION_DONT_CAPTURE_GROUP check in parse_effect(). +2006/03/22: [new] add API onig_noname_group_capture_is_active(). +2006/03/01: [spec] rename regex object type from regex_t to OnigRegexType. + add typedef OnigRegexType regex_t + unless ONIG_ESCAPE_REGEX_T_COLLISION is defined. +2006/02/27: [spec] change ONIG_MAX_MULTI_BYTE_RANGES_NUM from 1000 + to 10000. (for docdiff program) +2006/02/17: [dist] change COPYING year 2005 -> 2006. + +2006/02/07: Version 4.0.1 + +2006/02/07: [test] success in ruby 1.9.0 (2005-11-28) [i686-linux]. +2006/02/07: [bug] memory leaks in onig_free_shared_cclass_table(). +2006/02/03: [ruby] add -m 0644 option to install command in "make 19". +2006/02/03: [impl] rename ANCHOR_ANYCHAR_STAR_PL to ANCHOR_ANYCHAR_STAR_ML. + change from IS_POSIXLINE() to IS_MULTILINE() + for ANCHOR_ANYCHAR_START/_ML decision + in optimize_node_left(). +2006/01/26: [dist] update index.html for Oniguruma 2.5.3. +2006/01/25: [dist] update URL in index.html. + +2006/01/24: Version 4.0.0 + +2006/01/24: [test] success in ruby 1.9.0 (2005-11-28) [i386-cygwin]. +2006/01/24: [test] success in ruby 1.9.0 (2005-11-28) [i686-linux]. +2006/01/24: [dist] remove warnings from sample/encode.c. +2006/01/24: [dist] change install description in README(.ja). +2006/01/24: [dist] remove re.c.XXX.patch from distribution and CVS. +2006/01/24: [dist] --- support shared library --- + use GNU libtool/automake. + change configure.in and add Makefile.am, sample/Makefile.am. + add AUTHORS file. +2006/01/24: [dist] test programs return exit code -1 when test fails. +2006/01/24: [bug] (thanks KIMURA Koichi) + invalid syntax definition in ONIG_SYNTAX_GREP. + ONIG_SYN_OP_BRACE_INTERVAL + -> ONIG_SYN_OP_ESC_BRACE_INTERVAL +2006/01/23: [dist] fix configure.in for onig-config. +2006/01/19: [new] add new config USE_UNICODE_ALL_LINE_TERMINATORS. + (U+000d, U+0085, U+2028, U+2029) +2005/12/29: [dist] change pmatch array size to 25 in testconv.rb. +2005/12/26: [dist] fix name in test.rb. +2005/12/26: [dist] update index.html for 2.5.1. + +2005/11/29: Version 3.9.1 + +2005/11/29: [test] success in ruby 1.9.0 (2005-11-28) [i686-linux]. +2005/11/24: [test] success in ruby 1.9.0 (2005-08-09) [i686-linux]. +2005/11/21: [test] success in ruby 1.9.0 (2005-11-20) [i386-cygwin]. +2005/11/21: [bug] (thanks Allan Odgaard) + utf-8 character comments in extended mode leads + invalid result. + ex. /(?x)(?<= # o\n~) / + fix onigenc_unicode_is_code_ctype() and + utf8_is_code_ctype(). +2005/11/20: [bug] (thanks MATSUMOTO Satoshi) (thanks Isao Sonobe) + begin-line anchor and BM search optimization leads + invalid result in UTF-16/32. + fix in set_optimize_exact_info(). + +2005/11/20: Version 3.9.0 + +2005/11/20: [test] success in ruby 1.9.0 (2005-11-20) [i386-cygwin]. +2005/11/20: [test] success in ruby 1.9.0 (2005-10-18) [i386-cygwin]. +2005/11/20: [new] add new config USE_CRNL_AS_LINE_TERMINATOR. + (!!! NO SUPPORT experimental option !!!) +2005/11/15: [bug] (thanks Allan Odgaard) + tok->escape was not cleared in fetch_token_in_cc(). + ex. [\s&&[^\n]] makes wrong result. +2005/10/18: [impl] (thanks nobu) + change sjis_mbc_enc_len() + and node_new_cclass_by_codepoint_range() scope to static. +2005/09/05: [dist] remove link to MultiFind. +2005/09/01: [dist] add link to yagrep. + +2005/08/23: Version 3.8.9 + +2005/08/23: [test] success in ruby 1.9.0 (2005-08-09) [i686-linux]. +2005/08/23: [inst] fix Makefile.in for make ctest/ptest. + +2005/08/23: Version 3.8.8 + +2005/08/23: [test] success in ruby 1.9.0 (2005-08-09) [i686-linux]. +2005/08/23: [impl] split is_code_in_cc() from onig_is_code_in_cc(). +2005/08/23: [impl] should check DATA_ENSURE() at OP_CCLASS_NODE in match_at(). +2005/08/23: [impl] (thanks akr) + add ONIG_OPTION_MAXBIT for escape conflict with + Ruby's option. +2005/08/22: [impl] escape GCC 4.0 warnings for testc.c. +2005/08/22: [bug] (thanks nobu, matz) [ruby-dev:26840] + UTF-8 0xFE, 0xFF handling bug in code_is_in_cclass_node(). + abort on /\S*/ =~ "\xfe" +2005/08/22: [impl] escape GCC 4.0 warnings for sample/*.c. +2005/08/22: [impl] fix testconvu.rb. +2005/08/22: [impl] escape GCC 4.0 warnings. + +2005/08/09: Version 3.8.7 + +2005/08/09: [test] success in ruby 1.9.0 (2005-08-09) [i686-linux]. +2005/08/09: [bug] (thanks Allan Odgaard) + should not call enc_len() for s == range + in onig_search(). +2005/08/01: [dist] add mkdir $prefix, mkdir $exec_prefix to make install. + +2005/07/27: Version 3.8.6 + +2005/07/27: [test] success in ruby 1.9.0 (2005-07-26) [i686-linux]. +2005/07/27: [impl] update onig-config.in. +2005/07/26: [new] (thanks Yen-Ju Chen) + add Oniguruma configuration check program. + (onig-config.in) + +2005/07/14: Version 3.8.5 + +2005/07/14: [test] success in ruby 1.9.0 (2005-07-14) [i686-linux]. +2005/07/11: [test] success in ruby 1.9.0 (2005-07-04) [i686-linux]. +2005/07/11: [bug] (thanks nobu) [ruby-dev:26505] + invalid handling for /\c\x/ and /\C-\x/. + fix fetch_escaped_value(). +2005/07/05: [impl] (thanks Alexey Zakhlestine) + escape GCC 4.0 warnings. + +2005/07/01: Version 3.8.4 + +2005/07/01: [test] success in ruby 1.9.0 (2005-07-01) [i686-linux]. +2005/06/30: [test] success in ruby 1.9.0 (2005-06-28) [i686-linux]. +2005/06/30: [dist] add GB 18030 test to sample/encode.c. +2005/06/30: [impl] escape warning of gb18030_left_adjust_char_head(). +2005/06/30: [new] (contributed by KUBO Takehiro) + add new character encoding ONIG_ENCODING_GB18030. +2005/06/30: [bug] invalid ctype check for multibyte encodings. + ("graph", "print") + fix onigenc_mb2/4_is_code_ctype(), + eucjp_is_code_ctype() and sjis_is_code_ctype(). +2005/06/30: [bug] invalid conversion from code point to mbc in + onigenc_mb4_code_to_mbc(). + +2005/06/28: Version 3.8.3 + +2005/06/28: [test] success in ruby 1.9.0 (2005-06-28) [i686-linux]. +2005/06/27: [test] success in ruby 1.9.0 (2005-05-31) [i686-linux]. +2005/06/27: [bug] (thanks Wolfgang Nadasi-Donner) + invalid check for never ending recursion. + lower zero quantifier should be treated as + a non-recursive call alternative. + ex. /(?[^()]*(\(\g\)[^()]*)*)/ +2005/06/15: [impl] add divide_ambig_string_node_sub(). +2005/06/15: [dist] add a test to sample/encode.c. +2005/06/10: [new] add ONIG_SYNTAX_PERL_NG. (Perl + named group) + +2005/06/01: Version 3.8.2 + +2005/06/01: [test] success in ruby 1.9.0 (2005-05-31) [i686-linux]. +2005/05/31: [dist] add doc/FAQ and doc/FAQ.ja. +2005/05/31: [impl] minor change in node_new(). +2005/05/30: [test] success in ruby 1.9.0 (2005-05-11) [i686-linux]. +2005/05/30: [bug] (thanks Allan Odgaard) + FreeNodeList null check should be on thread-atomic + in node_new(). + +2005/05/11: Version 3.8.1 + +2005/05/11: [test] success in ruby 1.9.0 (2005-05-11) [i386-mswin32]. +2005/05/11: [dist] update win32/Makefile (make 19). +2005/05/11: [test] success in ruby 1.9.0 (2005-05-11) [i686-linux]. +2005/05/06: [test] success in ruby 1.9.0 (2005-05-06) [i686-linux]. +2005/05/06: [impl] (thanks nobu) [ruby-core:4815] + add #ifdef USE_VARIABLE_META_CHARS to goto label. +2005/04/25: [test] success in ruby 1.9.0 (2005-04-25) [i686-linux]. +2005/04/25: [impl] change DEFAULT_WARN_FUNCTION and DEFAULT_VERB_WARN_FUNCTION + to onig_rb_warn() and onig_rb_warning(). + +2005/04/15: Version 3.8.0 + +2005/04/15: [test] success in ruby 1.9.0 (2005-04-14) [i686-linux]. +2005/04/01: [test] success in ruby 1.9.0 (2005-03-24) [i686-linux]. +2005/04/01: [impl] (thanks Joe Orton) + (thanks Moriyoshi Koizumi) + many const-ification to many *.[ch] files. + +2005/03/25: Version 3.7.2 + +2005/03/25: [test] success in ruby 1.9.0 (2005-03-24) [i686-linux]. +2005/03/23: [test] success in ruby 1.9.0 (2005-03-20) [i686-linux]. +2005/03/23: [test] success in ruby 1.9.0 (2005-03-08) [i686-linux]. +2005/03/23: [new] add ONIG_SYNTAX_ASIS. +2005/03/23: [new] add ONIG_SYN_OP2_INEFFECTIVE_ESCAPE. +2005/03/09: [spec] rename MBCTYPE_XXX to RE_MBCTYPE_XXX. (GNU API) +2005/03/08: [test] success in ruby 1.9.0 (2005-03-08) [i686-linux]. +2005/03/08: [impl] (thanks matz) [ruby-dev:25783] + should not allocate memory for key data in st.c. + move st_*_strend() functions from st.c. fixed some + potential memory leaks. + (imported from Ruby 1.9 2005-03-08) + 2005/03/07: Version 3.7.1 2005/03/07: [test] success in ruby 1.9.0 (2005-03-07) [i686-linux]. @@ -24,7 +476,7 @@ History remove reggnu.c from make 19. 2005/02/19: [dist] update doc/API and doc/API.ja. 2005/02/19: [test] success in ruby 1.9.0 (2005-02-19) [i386-cygwin]. -2005/02/19: [impl] (thanks Alexey Zakhlestin) +2005/02/19: [impl] (thanks Alexey Zakhlestine) change UChar* to const UChar* in oniguruma.h, regenc.h and regparse.h. 2005/02/13: [impl] change UChar* to const UChar* in oniguruma.h and @@ -1358,16 +1810,29 @@ History [test: test] [memo: memo] -- - -svn mkdir http://localhost/repos/branches -m "" -svn mkdir http://localhost/repos/branches/oniguruma -m "" -svn copy http://localhost/repos/trunk/oniguruma http://localhost/repos/branches/oniguruma/2.X -m "branch for 8-bit encodings only" - - -svn copy http://localhost/repos/trunk/oniguruma http://localhost/repos/tags/oniguruma/X.X.X -m "onigdXXXXXXXX" - - + cvs history -T - + cvs rtag "VERSION_X_X_X" oniguruma + + + +* write Makefile.am and configure.in. +> aclocal +> libtoolize +> automake --foreign --add-missing +> autoconf +> configure --with-rubydir=... CFLAGS="-O2 -Wall" + + + + + VERSION = current:revision:age + + current: interface number (from 0) + revision: implementation number of same interface (from 0) + age: number of supported previous interfaces + (if current only supported then age == 0) + +//END diff --git a/ext/mbstring/oniguruma/README b/ext/mbstring/oniguruma/README index dc4fb3b64b..dff7fba562 100644 --- a/ext/mbstring/oniguruma/README +++ b/ext/mbstring/oniguruma/README @@ -1,9 +1,8 @@ -README 2005/02/04 +README 2007/06/18 Oniguruma ---- (C) K.Kosako http://www.geocities.jp/kosako3/oniguruma/ -http://www.ruby-lang.org/cgi-bin/cvsweb.cgi/oniguruma/ http://www.freebsd.org/cgi/cvsweb.cgi/ports/devel/oniguruma/ Oniguruma is a regular expressions library. @@ -14,11 +13,12 @@ Supported character encodings: ASCII, UTF-8, UTF-16BE, UTF-16LE, UTF-32BE, UTF-32LE, EUC-JP, EUC-TW, EUC-KR, EUC-CN, - Shift_JIS, Big5, KOI8-R, KOI8 (*), + Shift_JIS, Big5, GB 18030, KOI8-R, KOI8, ISO-8859-1, ISO-8859-2, ISO-8859-3, ISO-8859-4, ISO-8859-5, ISO-8859-6, ISO-8859-7, ISO-8859-8, ISO-8859-9, ISO-8859-10, ISO-8859-11, ISO-8859-13, ISO-8859-14, ISO-8859-15, ISO-8859-16 +* GB 18030: contributed by KUBO Takehiro * KOI8 is not included in library archive by default setup. (need to edit Makefile if you want to use it.) ------------------------------------------------------------ @@ -31,15 +31,20 @@ Install 2. make 3. make install - library file: libonig.a + * uninstall - test (ASCII/EUC-JP) + make uninstall - make ctest + * test (ASCII/EUC-JP) - uninstall + make atest - make uninstall + * configuration check + + onig-config --cflags + onig-config --libs + onig-config --prefix + onig-config --exec-prefix @@ -73,8 +78,21 @@ Regular Expressions Usage - Include oniguruma.h in your program. (native API) - See doc/API for native API. + Include oniguruma.h in your program. (Oniguruma API) + See doc/API for Oniguruma API. + + If you want to disable UChar type (== unsigned char) definition + in oniguruma.h, define ONIG_ESCAPE_UCHAR_COLLISION and then + include oniguruma.h. + + If you want to disable regex_t type definition in oniguruma.h, + define ONIG_ESCAPE_REGEX_T_COLLISION and then include oniguruma.h. + + Example of the compiling/linking command line in Unix or Cygwin, + (prefix == /usr/local case) + + cc sample.c -L/usr/local/lib -lonig + If you want to use static link library(onig_s.lib) in Win32, add option -DONIG_EXTERN=extern to C compiler. @@ -83,19 +101,20 @@ Usage Sample Programs - sample/simple.c example of the minimum (native API) + sample/simple.c example of the minimum (Oniguruma API) sample/names.c example of the named group callback. sample/encode.c example of some encodings. sample/listcap.c example of the capture history. sample/posix.c POSIX API sample. sample/sql.c example of the variable meta characters. (SQL-like pattern matching) - sample/syntax.c Perl and Java syntax test. + sample/syntax.c Perl, Java and ASIS syntax test. Source Files oniguruma.h Oniguruma API header file. (public) + onig-config.in configuration check program template. regenc.h character encodings framework header file. regint.h internal definitions @@ -125,9 +144,10 @@ Source Files enc/euc_tw.c EUC-TW encoding. enc/euc_kr.c EUC-KR, EUC-CN encoding. enc/sjis.c Shift_JIS encoding. - enc/big5.c Big5 encoding. - enc/koi8.c KOI8 encoding. - enc/koi8_r.c KOI8-R encoding. + enc/big5.c Big5 encoding. + enc/gb18030.c GB 18030 encoding (contributed by KUBO Takehiro) + enc/koi8.c KOI8 encoding. + enc/koi8_r.c KOI8-R encoding. enc/iso8859_1.c ISO-8859-1 encoding. (Latin-1) enc/iso8859_2.c ISO-8859-2 encoding. (Latin-2) enc/iso8859_3.c ISO-8859-3 encoding. (Latin-3) @@ -159,23 +179,11 @@ Source Files API differences with Japanized GNU regex(version 0.12) of Ruby 1.8/1.6 + re_compile_fastmap() is removed. - + re_recompile_pattern() is added. + re_alloc_pattern() is added. -ToDo - - ? ignore case in full code point range of Unicode. - ? Unicode Property. - ? ambig-flag Katakana <-> Hiragana. - ? add ONIG_OPTION_NOTBOS/NOTEOS. (\A, \z, \Z) - ? add ONIG_SYNTAX_ASIS. - ?? \X (== \PM\pM*) - ?? implement syntax behavior ONIG_SYN_CONTEXT_INDEP_ANCHORS. - ?? variable line separator. - ?? transmission stopper. (return ONIG_STOP from match_at()) -and I'm thankful to Akinori MUSHA. +I'm thankful to Akinori MUSHA. Mail Address: K.Kosako diff --git a/ext/mbstring/oniguruma/README.ja b/ext/mbstring/oniguruma/README.ja index 44553abfef..2dee793cae 100644 --- a/ext/mbstring/oniguruma/README.ja +++ b/ext/mbstring/oniguruma/README.ja @@ -1,9 +1,8 @@ -README.ja 2005/02/04 +README.ja 2007/06/18 µ´¼Ö ---- (C) K.Kosako http://www.geocities.jp/kosako3/oniguruma/ -http://www.ruby-lang.org/cgi-bin/cvsweb.cgi/oniguruma/ http://www.freebsd.org/cgi/cvsweb.cgi/ports/devel/oniguruma/ µ´¼Ö¤ÏÀµµ¬É½¸½¥é¥¤¥Ö¥é¥ê¤Ç¤¢¤ë¡£ @@ -14,11 +13,12 @@ http://www.freebsd.org/cgi/cvsweb.cgi/ports/devel/oniguruma/ ASCII, UTF-8, UTF-16BE, UTF-16LE, UTF-32BE, UTF-32LE, EUC-JP, EUC-TW, EUC-KR, EUC-CN, - Shift_JIS, Big5, KOI8-R, KOI8 (*), + Shift_JIS, Big5, GB 18030, KOI8-R, KOI8, ISO-8859-1, ISO-8859-2, ISO-8859-3, ISO-8859-4, ISO-8859-5, ISO-8859-6, ISO-8859-7, ISO-8859-8, ISO-8859-9, ISO-8859-10, ISO-8859-11, ISO-8859-13, ISO-8859-14, ISO-8859-15, ISO-8859-16 +* GB 18030: µ×ÊÝ·òÍλáÄó¶¡ * KOI8¤Ï¥Ç¥Õ¥©¥ë¥È¤Î¥»¥Ã¥È¥¢¥Ã¥×¤Ç¤Ï¥é¥¤¥Ö¥é¥ê¤ÎÃæ¤Ë´Þ¤Þ¤ì¤Ê¤¤¡£ (ɬÍפǤ¢¤ì¤ÐMakefile¤òÊÔ½¸¤¹¤ë¤³¤È) ------------------------------------------------------------ @@ -31,15 +31,21 @@ http://www.freebsd.org/cgi/cvsweb.cgi/ports/devel/oniguruma/ 2. make 3. make install - ¥é¥¤¥Ö¥é¥ê¥Õ¥¡¥¤¥ë: libonig.a + ¥¢¥ó¥¤¥ó¥¹¥È¡¼¥ë + + make uninstall ưºî¥Æ¥¹¥È (ASCII/EUC-JP) - make ctest + make atest - ¥¢¥ó¥¤¥ó¥¹¥È¡¼¥ë - make uninstall + ¹½À®³Îǧ + + onig-config --cflags + onig-config --libs + onig-config --prefix + onig-config --exec-prefix @@ -71,8 +77,28 @@ http://www.freebsd.org/cgi/cvsweb.cgi/ports/devel/oniguruma/ »ÈÍÑÊýË¡ - »ÈÍѤ¹¤ë¥×¥í¥°¥é¥à¤Ç¡¢oniguruma.h¤ò¥¤¥ó¥¯¥ë¡¼¥É¤¹¤ë(Native API¤Î¾ì¹ç)¡£ - Native API¤Ë¤Ä¤¤¤Æ¤Ï¡¢doc/API.ja¤ò»²¾È¡£ + »ÈÍѤ¹¤ë¥×¥í¥°¥é¥à¤Ç¡¢oniguruma.h¤ò¥¤¥ó¥¯¥ë¡¼¥É¤¹¤ë(Oniguruma API¤Î¾ì¹ç)¡£ + Oniguruma API¤Ë¤Ä¤¤¤Æ¤Ï¡¢doc/API.ja¤ò»²¾È¡£ + + oniguruma.h¤ÇÄêµÁ¤µ¤ì¤Æ¤¤¤ë·¿Ì¾UChar(== unsigned char)¤ò̵¸ú¤Ë¤·¤¿¤¤¾ì¹ç + ¤Ë¤Ï¡¢ONIG_ESCAPE_UCHAR_COLLISION¤òdefine¤·¤Æ¤«¤éoniguruma.h¤ò¥¤¥ó¥¯¥ë¡¼¥É + ¤¹¤ë¤³¤È¡£¤³¤Î¤È¤­¤Ë¤ÏUChar¤ÏÄêµÁ¤µ¤ì¤º¡¢OnigUChar¤È¤¤¤¦Ì¾Á°¤ÎÄêµÁ¤Î¤ß¤¬ + Í­¸ú¤Ë¤Ê¤ë¡£ + + oniguruma.h¤ÇÄêµÁ¤µ¤ì¤Æ¤¤¤ë·¿Ì¾regex_t¤ò̵¸ú¤Ë¤·¤¿¤¤¾ì¹ç¤Ë¤Ï¡¢ + ONIG_ESCAPE_REGEX_T_COLLISION¤òdefine¤·¤Æ¤«¤éoniguruma.h¤ò¥¤¥ó¥¯¥ë¡¼¥É + ¤¹¤ë¤³¤È¡£¤³¤Î¤È¤­¤Ë¤Ïregex_t¤ÏÄêµÁ¤µ¤ì¤º¡¢OnigRegexType, OnigRegex¤È¤¤¤¦ + ̾Á°¤ÎÄêµÁ¤Î¤ß¤¬Í­¸ú¤Ë¤Ê¤ë¡£ + + Unix/Cygwin¾å¤Ç¥³¥ó¥Ñ¥¤¥ë¡¢¥ê¥ó¥¯¤¹¤ë¾ì¹ç¤ÎÎã¡§ + (prefix¤¬/usr/local¤Î¤È¤­) + cc sample.c -L/usr/local/lib -lonig + + GNU libtool¤ò»ÈÍѤ·¤Æ¤¤¤ë¤Î¤Ç¡¢¥×¥é¥Ã¥È¥Õ¥©¡¼¥à¤¬¶¦Í­¥é¥¤¥Ö¥é¥ê¤ò¥µ¥Ý¡¼¥È¤·¤Æ + ¤¤¤ì¤Ð¡¢»ÈÍѤǤ­¤ë¤è¤¦¤Ë¤Ê¤Ã¤Æ¤¤¤ë¡£ + ÀÅۥ饤¥Ö¥é¥ê¤È¶¦Í­¥é¥¤¥Ö¥é¥ê¤Î¤É¤Á¤é¤ò»ÈÍѤ¹¤ë¤«¤ò»ØÄꤹ¤ëÊýË¡¡¢¼Â¹Ô»þÅÀ¤Ç¤Î + ´Ä¶­ÀßÄêÊýË¡¤Ë¤Ä¤Æ¤Ï¡¢¼«Ê¬¤ÇÄ´¤Ù¤Æ²¼¤µ¤¤¡£ + Win32¤Ç¥¹¥¿¥Æ¥£¥Ã¥¯¥ê¥ó¥¯¥é¥¤¥Ö¥é¥ê(onig_s.lib)¤ò¥ê¥ó¥¯¤¹¤ë¾ì¹ç¤Ë¤Ï¡¢ ¥³¥ó¥Ñ¥¤¥ë¤¹¤ë¤È¤­¤Ë -DONIG_EXTERN=extern ¤ò¥³¥ó¥Ñ¥¤¥ë°ú¿ô¤ËÄɲ乤뤳¤È¡£ @@ -80,18 +106,19 @@ http://www.freebsd.org/cgi/cvsweb.cgi/ports/devel/oniguruma/ »ÈÍÑÎã¥×¥í¥°¥é¥à - sample/simple.c ºÇ¾®Îã (native API) + sample/simple.c ºÇ¾®Îã (Oniguruma API) sample/names.c ̾Á°ÉÕ¤­¥°¥ë¡¼¥×¥³¡¼¥ë¥Ð¥Ã¥¯»ÈÍÑÎã sample/encode.c ´ö¤Ä¤«¤Îʸ»ú¥¨¥ó¥³¡¼¥Ç¥£¥ó¥°»ÈÍÑÎã sample/listcap.c Êá³ÍÍúÎòµ¡Ç½¤Î»ÈÍÑÎã sample/posix.c POSIX API»ÈÍÑÎã sample/sql.c ²ÄÊѥ᥿ʸ»úµ¡Ç½»ÈÍÑÎã (SQL-like ¥Ñ¥¿¡¼¥ó) - sample/syntax.c Perl¤ÈJavaʸˡ¤Î¥Æ¥¹¥È + sample/syntax.c Perl¡¢Java¡¢ASISʸˡ¤Î¥Æ¥¹¥È ¥½¡¼¥¹¥Õ¥¡¥¤¥ë oniguruma.h µ´¼ÖAPI¥Ø¥Ã¥À (¸ø³«) + onig-config.in onig-config¥×¥í¥°¥é¥à ¥Æ¥ó¥×¥ì¡¼¥È regenc.h ʸ»ú¥¨¥ó¥³¡¼¥Ç¥£¥ó¥°ÏÈÁȤߥإåÀ regint.h ÆâÉôÀë¸À @@ -122,6 +149,7 @@ http://www.freebsd.org/cgi/cvsweb.cgi/ports/devel/oniguruma/ enc/euc_kr.c EUC-KR, EUC-CN ¥¨¥ó¥³¡¼¥Ç¥£¥ó¥° enc/sjis.c Shift_JIS ¥¨¥ó¥³¡¼¥Ç¥£¥ó¥° enc/big5.c Big5 ¥¨¥ó¥³¡¼¥Ç¥£¥ó¥° + enc/gb18030.c GB 18030 ¥¨¥ó¥³¡¼¥Ç¥£¥ó¥° (µ×ÊÝ·òÍλá Äó¶¡) enc/koi8.c KOI8 ¥¨¥ó¥³¡¼¥Ç¥£¥ó¥° enc/koi8_r.c KOI8-R ¥¨¥ó¥³¡¼¥Ç¥£¥ó¥° enc/iso8859_1.c ISO-8859-1 (Latin-1) @@ -155,23 +183,10 @@ http://www.freebsd.org/cgi/cvsweb.cgi/ports/devel/oniguruma/ Ruby 1.8/1.6¤ÎÆüËܸ첽GNU regex¤È¤ÎAPI¤Î°ã¤¤ + re_compile_fastmap() ¤Ïºï½ü¤µ¤ì¤¿¡£ - + re_recompile_pattern() ¤¬Äɲ䵤줿¡£ + re_alloc_pattern() ¤¬Äɲ䵤줿¡£ -»Ä·ï - - ? UnicodeÁ´¥³¡¼¥É¥Ý¥¤¥ó¥ÈÎΰè¤Ç¤ÎÂçʸ»ú¾®Ê¸»ú¾È¹ç - ? Unicode¥×¥í¥Ñ¥Æ¥£ - ? ambig-flag Katakana <-> Hiragana - ? ONIG_OPTION_NOTBOS/NOTEOSÄɲà (\A, \z, \Z) - ? ONIG_SYNTAX_ASISÄɲà - ?? \X (== \PM\pM*) - ?? ʸˡÍ×ÁÇ ONIG_SYN_CONTEXT_INDEP_ANCHORS¤Î¼ÂÁõ - ?? ²þ¹Ôʸ»ú(ʸ»úÎó)¤òÊѹ¹¤Ç¤­¤ë - ?? ¸¡º÷°ÌÃÖ°ÜÆ°Ää»ß±é»»»Ò (match_at()¤«¤éONIG_STOP¤òÊÖ¤¹) - -and I'm thankful to Akinori MUSHA. +I'm thankful to Akinori MUSHA. -Mail Address: K.Kosako +¥¢¥É¥ì¥¹: K.Kosako diff --git a/ext/mbstring/oniguruma/config.h.in b/ext/mbstring/oniguruma/config.h.in index 5ca2056fb3..4a2fc28d82 100644 --- a/ext/mbstring/oniguruma/config.h.in +++ b/ext/mbstring/oniguruma/config.h.in @@ -1,69 +1,108 @@ -/* config.h.in. Generated automatically from configure.in by autoheader. */ +/* config.h.in. Generated from configure.in by autoheader. */ -/* Define if using alloca.c. */ -#undef C_ALLOCA - -/* Define to empty if the keyword does not work. */ -#undef const - -/* Define to one of _getb67, GETB67, getb67 for Cray-2 and Cray-YMP systems. - This function is required for alloca.c support on those systems. */ +/* Define to one of `_getb67', `GETB67', `getb67' for Cray-2 and Cray-YMP + systems. This function is required for `alloca.c' support on those systems. + */ #undef CRAY_STACKSEG_END -/* Define if you have alloca, as a function or macro. */ +/* Define to 1 if using `alloca.c'. */ +#undef C_ALLOCA + +/* Define to 1 if you have `alloca', as a function or macro. */ #undef HAVE_ALLOCA -/* Define if you have and it should be used (not on Ultrix). */ +/* Define to 1 if you have and it should be used (not on Ultrix). + */ #undef HAVE_ALLOCA_H -/* If using the C implementation of alloca, define if you know the - direction of stack growth for your system; otherwise it will be - automatically deduced at run-time. - STACK_DIRECTION > 0 => grows toward higher addresses - STACK_DIRECTION < 0 => grows toward lower addresses - STACK_DIRECTION = 0 => direction of growth unknown - */ -#undef STACK_DIRECTION +/* Define to 1 if you have the header file. */ +#undef HAVE_DLFCN_H -/* Define if you have the ANSI C header files. */ -#undef STDC_HEADERS +/* Define to 1 if you have the header file. */ +#undef HAVE_INTTYPES_H -/* Define if you can safely include both and . */ -#undef TIME_WITH_SYS_TIME +/* Define to 1 if you have the header file. */ +#undef HAVE_MEMORY_H -/* The number of bytes in a int. */ -#undef SIZEOF_INT +/* Define if compilerr supports prototypes */ +#undef HAVE_PROTOTYPES -/* The number of bytes in a long. */ -#undef SIZEOF_LONG +/* Define if compiler supports stdarg prototypes */ +#undef HAVE_STDARG_PROTOTYPES -/* The number of bytes in a short. */ -#undef SIZEOF_SHORT +/* Define to 1 if you have the header file. */ +#undef HAVE_STDINT_H -/* Define if you have the header file. */ +/* Define to 1 if you have the header file. */ #undef HAVE_STDLIB_H -/* Define if you have the header file. */ +/* Define to 1 if you have the header file. */ +#undef HAVE_STRINGS_H + +/* Define to 1 if you have the header file. */ #undef HAVE_STRING_H -/* Define if you have the header file. */ -#undef HAVE_STRINGS_H +/* Define to 1 if you have the header file. */ +#undef HAVE_SYS_STAT_H -/* Define if you have the header file. */ -#undef HAVE_SYS_TYPES_H +/* Define to 1 if you have the header file. */ +#undef HAVE_SYS_TIMES_H -/* Define if you have the header file. */ +/* Define to 1 if you have the header file. */ #undef HAVE_SYS_TIME_H -/* Define if you have the header file. */ -#undef HAVE_SYS_TIMES_H +/* Define to 1 if you have the header file. */ +#undef HAVE_SYS_TYPES_H -/* Define if you have the header file. */ +/* Define to 1 if you have the header file. */ #undef HAVE_UNISTD_H -/* Define if you have the function argument prototype */ -#undef HAVE_PROTOTYPES +/* Name of package */ +#undef PACKAGE -/* Define if you have the variable length prototypes and stdarg.h */ -#undef HAVE_STDARG_PROTOTYPES +/* Define to the address where bug reports for this package should be sent. */ +#undef PACKAGE_BUGREPORT + +/* Define to the full name of this package. */ +#undef PACKAGE_NAME + +/* Define to the full name and version of this package. */ +#undef PACKAGE_STRING + +/* Define to the one symbol short name of this package. */ +#undef PACKAGE_TARNAME + +/* Define to the version of this package. */ +#undef PACKAGE_VERSION + +/* The size of a `int', as computed by sizeof. */ +#undef SIZEOF_INT + +/* The size of a `long', as computed by sizeof. */ +#undef SIZEOF_LONG + +/* The size of a `short', as computed by sizeof. */ +#undef SIZEOF_SHORT +/* If using the C implementation of alloca, define if you know the + direction of stack growth for your system; otherwise it will be + automatically deduced at run-time. + STACK_DIRECTION > 0 => grows toward higher addresses + STACK_DIRECTION < 0 => grows toward lower addresses + STACK_DIRECTION = 0 => direction of growth unknown */ +#undef STACK_DIRECTION + +/* Define to 1 if you have the ANSI C header files. */ +#undef STDC_HEADERS + +/* Define to 1 if you can safely include both and . */ +#undef TIME_WITH_SYS_TIME + +/* Define if combination explosion check */ +#undef USE_COMBINATION_EXPLOSION_CHECK + +/* Version number of package */ +#undef VERSION + +/* Define to empty if `const' does not conform to ANSI C. */ +#undef const diff --git a/ext/mbstring/oniguruma/enc/big5.c b/ext/mbstring/oniguruma/enc/big5.c index 763872e963..86792666a4 100644 --- a/ext/mbstring/oniguruma/enc/big5.c +++ b/ext/mbstring/oniguruma/enc/big5.c @@ -29,7 +29,7 @@ #include "regenc.h" -static int EncLen_BIG5[] = { +static const int EncLen_BIG5[] = { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, diff --git a/ext/mbstring/oniguruma/enc/euc_jp.c b/ext/mbstring/oniguruma/enc/euc_jp.c index 5f13e33eb4..71c81ee9fe 100644 --- a/ext/mbstring/oniguruma/enc/euc_jp.c +++ b/ext/mbstring/oniguruma/enc/euc_jp.c @@ -31,7 +31,7 @@ #define eucjp_islead(c) ((UChar )((c) - 0xa1) > 0xfe - 0xa1) -static int EncLen_EUCJP[] = { +static const int EncLen_EUCJP[] = { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, @@ -158,20 +158,16 @@ eucjp_is_mbc_ambiguous(OnigAmbigType flag, const UChar** pp, const UChar* end) static int eucjp_is_code_ctype(OnigCodePoint code, unsigned int ctype) { - if ((ctype & ONIGENC_CTYPE_WORD) != 0) { - if (code < 128) - return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype); - else + if (code < 128) + return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype); + else { + if ((ctype & (ONIGENC_CTYPE_WORD | + ONIGENC_CTYPE_GRAPH | ONIGENC_CTYPE_PRINT)) != 0) { return (eucjp_code_to_mbclen(code) > 1 ? TRUE : FALSE); - - ctype &= ~ONIGENC_CTYPE_WORD; - if (ctype == 0) return FALSE; + } } - if (code < 128) - return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype); - else - return FALSE; + return FALSE; } static UChar* diff --git a/ext/mbstring/oniguruma/enc/euc_kr.c b/ext/mbstring/oniguruma/enc/euc_kr.c index c1e83b7e66..57bf801536 100644 --- a/ext/mbstring/oniguruma/enc/euc_kr.c +++ b/ext/mbstring/oniguruma/enc/euc_kr.c @@ -29,7 +29,7 @@ #include "regenc.h" -static int EncLen_EUCKR[] = { +static const int EncLen_EUCKR[] = { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, diff --git a/ext/mbstring/oniguruma/enc/euc_tw.c b/ext/mbstring/oniguruma/enc/euc_tw.c index 4e5851a451..6f396e75e6 100644 --- a/ext/mbstring/oniguruma/enc/euc_tw.c +++ b/ext/mbstring/oniguruma/enc/euc_tw.c @@ -29,7 +29,7 @@ #include "regenc.h" -static int EncLen_EUCTW[] = { +static const int EncLen_EUCTW[] = { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, diff --git a/ext/mbstring/oniguruma/enc/iso8859_1.c b/ext/mbstring/oniguruma/enc/iso8859_1.c index 53ad52ee13..5646f26c10 100644 --- a/ext/mbstring/oniguruma/enc/iso8859_1.c +++ b/ext/mbstring/oniguruma/enc/iso8859_1.c @@ -2,7 +2,7 @@ iso8859_1.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2005 K.Kosako + * Copyright (c) 2002-2006 K.Kosako * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -32,23 +32,23 @@ #define ENC_IS_ISO_8859_1_CTYPE(code,ctype) \ ((EncISO_8859_1_CtypeTable[code] & ctype) != 0) -static unsigned short EncISO_8859_1_CtypeTable[256] = { +static const unsigned short EncISO_8859_1_CtypeTable[256] = { 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x220c, 0x2209, 0x2208, 0x2208, 0x2208, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, - 0x2284, 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x21a0, 0x21a0, 0x21a0, - 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, + 0x2284, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, + 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, - 0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x20a0, 0x20a0, 0x20a0, 0x21a0, + 0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, - 0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x31a0, - 0x20a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2, + 0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x31a0, + 0x21a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, - 0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x20a0, 0x21a0, 0x20a0, 0x2008, + 0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x2008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, @@ -72,16 +72,6 @@ iso_8859_1_mbc_to_normalize(OnigAmbigType flag, const UChar** pp, const UChar* e { const UChar* p = *pp; - if (end > p + 1 && (flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0) { - if ((*p == 's' && *(p+1) == 's') || - ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 && - (*p == 'S' && *(p+1) == 'S'))) { - *lower = 0xdf; - (*pp) += 2; - return 1; - } - } - if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 && ONIGENC_IS_MBC_ASCII(p)) || ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 && @@ -101,22 +91,6 @@ iso_8859_1_is_mbc_ambiguous(OnigAmbigType flag, { const UChar* p = *pp; - if ((flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0) { - if (end > p + 1) { - if ((*p == 's' && *(p+1) == 's') || - ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 && - (*p == 'S' && *(p+1) == 'S'))) { - (*pp) += 2; - return TRUE; - } - } - - if (*p == 0xdf) { - (*pp)++; - return TRUE; - } - } - (*pp)++; if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 && ONIGENC_IS_MBC_ASCII(p)) || @@ -153,8 +127,7 @@ OnigEncodingType OnigEncodingISO_8859_1 = { 1, /* max enc length */ 1, /* min enc length */ (ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE | - ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE | - ONIGENC_AMBIGUOUS_MATCH_COMPOUND), + ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE ), { (OnigCodePoint )'\\' /* esc */ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */ diff --git a/ext/mbstring/oniguruma/enc/iso8859_10.c b/ext/mbstring/oniguruma/enc/iso8859_10.c index a9331cebf3..8081ef8010 100644 --- a/ext/mbstring/oniguruma/enc/iso8859_10.c +++ b/ext/mbstring/oniguruma/enc/iso8859_10.c @@ -2,7 +2,7 @@ iso8859_10.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2005 K.Kosako + * Copyright (c) 2002-2006 K.Kosako * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -33,7 +33,7 @@ #define ENC_IS_ISO_8859_10_CTYPE(code,ctype) \ ((EncISO_8859_10_CtypeTable[code] & ctype) != 0) -static UChar EncISO_8859_10_ToLowerCaseTable[256] = { +static const UChar EncISO_8859_10_ToLowerCaseTable[256] = { '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007', '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017', '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027', @@ -68,23 +68,23 @@ static UChar EncISO_8859_10_ToLowerCaseTable[256] = { '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377' }; -static unsigned short EncISO_8859_10_CtypeTable[256] = { +static const unsigned short EncISO_8859_10_CtypeTable[256] = { 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x220c, 0x2209, 0x2208, 0x2208, 0x2208, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, - 0x2284, 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x21a0, 0x21a0, 0x21a0, - 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, + 0x2284, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, + 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, - 0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x20a0, 0x20a0, 0x20a0, 0x21a0, + 0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, - 0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x31a0, - 0x20a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2, + 0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x31a0, + 0x21a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, - 0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x20a0, 0x21a0, 0x20a0, 0x2008, + 0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x2008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, @@ -109,16 +109,6 @@ iso_8859_10_mbc_to_normalize(OnigAmbigType flag, { const UChar* p = *pp; - if (end > p + 1 && (flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0) { - if ((*p == 's' && *(p+1) == 's') || - ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 && - (*p == 'S' && *(p+1) == 'S'))) { - *lower = 0xdf; - (*pp) += 2; - return 1; - } - } - if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 && ONIGENC_IS_MBC_ASCII(p)) || ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 && @@ -138,22 +128,6 @@ iso_8859_10_is_mbc_ambiguous(OnigAmbigType flag, { const UChar* p = *pp; - if ((flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0) { - if (end > p + 1) { - if ((*p == 's' && *(p+1) == 's') || - ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 && - (*p == 'S' && *(p+1) == 'S'))) { - (*pp) += 2; - return TRUE; - } - } - - if (*p == 0xdf) { - (*pp)++; - return TRUE; - } - } - (*pp)++; if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 && ONIGENC_IS_MBC_ASCII(p)) || @@ -186,9 +160,9 @@ iso_8859_10_is_code_ctype(OnigCodePoint code, unsigned int ctype) static int iso_8859_10_get_all_pair_ambig_codes(OnigAmbigType flag, - OnigPairAmbigCodes** ccs) + const OnigPairAmbigCodes** ccs) { - static OnigPairAmbigCodes cc[] = { + static const OnigPairAmbigCodes cc[] = { { 0xa1, 0xb1 }, { 0xa2, 0xb2 }, { 0xa3, 0xb3 }, @@ -302,8 +276,7 @@ OnigEncodingType OnigEncodingISO_8859_10 = { 1, /* max enc length */ 1, /* min enc length */ (ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE | - ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE | - ONIGENC_AMBIGUOUS_MATCH_COMPOUND), + ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE ), { (OnigCodePoint )'\\' /* esc */ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */ diff --git a/ext/mbstring/oniguruma/enc/iso8859_11.c b/ext/mbstring/oniguruma/enc/iso8859_11.c index bb1098807a..de9bb3b825 100644 --- a/ext/mbstring/oniguruma/enc/iso8859_11.c +++ b/ext/mbstring/oniguruma/enc/iso8859_11.c @@ -32,23 +32,23 @@ #define ENC_IS_ISO_8859_11_CTYPE(code,ctype) \ ((EncISO_8859_11_CtypeTable[code] & ctype) != 0) -static unsigned short EncISO_8859_11_CtypeTable[256] = { +static const unsigned short EncISO_8859_11_CtypeTable[256] = { 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x220c, 0x2209, 0x2208, 0x2208, 0x2208, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, - 0x2284, 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x21a0, 0x21a0, 0x21a0, - 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, + 0x2284, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, + 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, - 0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x20a0, 0x20a0, 0x20a0, 0x21a0, + 0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, - 0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x31a0, - 0x20a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2, + 0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x31a0, + 0x21a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, - 0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x20a0, 0x21a0, 0x20a0, 0x2008, + 0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x2008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, diff --git a/ext/mbstring/oniguruma/enc/iso8859_13.c b/ext/mbstring/oniguruma/enc/iso8859_13.c index 827ca508e8..69316edfc3 100644 --- a/ext/mbstring/oniguruma/enc/iso8859_13.c +++ b/ext/mbstring/oniguruma/enc/iso8859_13.c @@ -2,7 +2,7 @@ iso8859_13.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2005 K.Kosako + * Copyright (c) 2002-2007 K.Kosako * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -33,7 +33,7 @@ #define ENC_IS_ISO_8859_13_CTYPE(code,ctype) \ ((EncISO_8859_13_CtypeTable[code] & ctype) != 0) -static UChar EncISO_8859_13_ToLowerCaseTable[256] = { +static const UChar EncISO_8859_13_ToLowerCaseTable[256] = { '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007', '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017', '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027', @@ -68,23 +68,23 @@ static UChar EncISO_8859_13_ToLowerCaseTable[256] = { '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377' }; -static unsigned short EncISO_8859_13_CtypeTable[256] = { +static const unsigned short EncISO_8859_13_CtypeTable[256] = { 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x220c, 0x2209, 0x2208, 0x2208, 0x2208, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, - 0x2284, 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x21a0, 0x21a0, 0x21a0, - 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, + 0x2284, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, + 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, - 0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x20a0, 0x20a0, 0x20a0, 0x21a0, + 0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, - 0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x31a0, - 0x20a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2, + 0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x31a0, + 0x21a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, - 0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x20a0, 0x21a0, 0x20a0, 0x2008, + 0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x2008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, @@ -104,21 +104,11 @@ static unsigned short EncISO_8859_13_CtypeTable[256] = { }; static int -iso_8859_13_mbc_to_normalize(OnigAmbigType flag, - const UChar** pp, const UChar* end, UChar* lower) +mbc_to_normalize(OnigAmbigType flag, + const UChar** pp, const UChar* end, UChar* lower) { const UChar* p = *pp; - if (end > p + 1 && (flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0) { - if ((*p == 's' && *(p+1) == 's') || - ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 && - (*p == 'S' && *(p+1) == 'S'))) { - *lower = 0xdf; - (*pp) += 2; - return 1; - } - } - if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 && ONIGENC_IS_MBC_ASCII(p)) || ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 && @@ -133,27 +123,10 @@ iso_8859_13_mbc_to_normalize(OnigAmbigType flag, } static int -iso_8859_13_is_mbc_ambiguous(OnigAmbigType flag, - const UChar** pp, const UChar* end) +is_mbc_ambiguous(OnigAmbigType flag, const UChar** pp, const UChar* end) { const UChar* p = *pp; - if ((flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0) { - if (end > p + 1) { - if ((*p == 's' && *(p+1) == 's') || - ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 && - (*p == 'S' && *(p+1) == 'S'))) { - (*pp) += 2; - return TRUE; - } - } - - if (*p == 0xdf) { - (*pp)++; - return TRUE; - } - } - (*pp)++; if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 && ONIGENC_IS_MBC_ASCII(p)) || @@ -176,7 +149,7 @@ iso_8859_13_is_mbc_ambiguous(OnigAmbigType flag, } static int -iso_8859_13_is_code_ctype(OnigCodePoint code, unsigned int ctype) +is_code_ctype(OnigCodePoint code, unsigned int ctype) { if (code < 256) return ENC_IS_ISO_8859_13_CTYPE(code, ctype); @@ -185,74 +158,73 @@ iso_8859_13_is_code_ctype(OnigCodePoint code, unsigned int ctype) } static int -iso_8859_13_get_all_pair_ambig_codes(OnigAmbigType flag, - OnigPairAmbigCodes** ccs) +get_all_pair_ambig_codes(OnigAmbigType flag, const OnigPairAmbigCodes** ccs) { - static OnigPairAmbigCodes cc[] = { - { 0xc0, 0xe0 }, - { 0xc1, 0xe1 }, - { 0xc2, 0xe2 }, - { 0xc3, 0xe3 }, - { 0xc4, 0xe4 }, - { 0xc5, 0xe5 }, - { 0xc6, 0xe6 }, - { 0xc7, 0xe7 }, - { 0xc8, 0xe8 }, - { 0xc9, 0xe9 }, - { 0xca, 0xea }, - { 0xcb, 0xeb }, - { 0xcc, 0xec }, - { 0xcd, 0xed }, - { 0xce, 0xee }, - { 0xcf, 0xef }, + static const OnigPairAmbigCodes cc[] = { + { 0xc0, 0xe0 }, + { 0xc1, 0xe1 }, + { 0xc2, 0xe2 }, + { 0xc3, 0xe3 }, + { 0xc4, 0xe4 }, + { 0xc5, 0xe5 }, + { 0xc6, 0xe6 }, + { 0xc7, 0xe7 }, + { 0xc8, 0xe8 }, + { 0xc9, 0xe9 }, + { 0xca, 0xea }, + { 0xcb, 0xeb }, + { 0xcc, 0xec }, + { 0xcd, 0xed }, + { 0xce, 0xee }, + { 0xcf, 0xef }, - { 0xd0, 0xf0 }, - { 0xd1, 0xf1 }, - { 0xd2, 0xf2 }, - { 0xd3, 0xf3 }, - { 0xd4, 0xf4 }, - { 0xd5, 0xf5 }, - { 0xd6, 0xf6 }, - { 0xd8, 0xf8 }, - { 0xd9, 0xf9 }, - { 0xda, 0xfa }, - { 0xdb, 0xfb }, - { 0xdc, 0xfc }, - { 0xdd, 0xfd }, - { 0xde, 0xfe }, + { 0xd0, 0xf0 }, + { 0xd1, 0xf1 }, + { 0xd2, 0xf2 }, + { 0xd3, 0xf3 }, + { 0xd4, 0xf4 }, + { 0xd5, 0xf5 }, + { 0xd6, 0xf6 }, + { 0xd8, 0xf8 }, + { 0xd9, 0xf9 }, + { 0xda, 0xfa }, + { 0xdb, 0xfb }, + { 0xdc, 0xfc }, + { 0xdd, 0xfd }, + { 0xde, 0xfe }, - { 0xe0, 0xc0 }, - { 0xe1, 0xc1 }, - { 0xe2, 0xc2 }, - { 0xe3, 0xc3 }, - { 0xe4, 0xc4 }, - { 0xe5, 0xc5 }, - { 0xe6, 0xc6 }, - { 0xe7, 0xc7 }, - { 0xe8, 0xc8 }, - { 0xe9, 0xc9 }, - { 0xea, 0xca }, - { 0xeb, 0xcb }, - { 0xec, 0xcc }, - { 0xed, 0xcd }, - { 0xee, 0xce }, - { 0xef, 0xcf }, + { 0xe0, 0xc0 }, + { 0xe1, 0xc1 }, + { 0xe2, 0xc2 }, + { 0xe3, 0xc3 }, + { 0xe4, 0xc4 }, + { 0xe5, 0xc5 }, + { 0xe6, 0xc6 }, + { 0xe7, 0xc7 }, + { 0xe8, 0xc8 }, + { 0xe9, 0xc9 }, + { 0xea, 0xca }, + { 0xeb, 0xcb }, + { 0xec, 0xcc }, + { 0xed, 0xcd }, + { 0xee, 0xce }, + { 0xef, 0xcf }, - { 0xf0, 0xd0 }, - { 0xf1, 0xd1 }, - { 0xf2, 0xd2 }, - { 0xf3, 0xd3 }, - { 0xf4, 0xd4 }, - { 0xf5, 0xd5 }, - { 0xf6, 0xd6 }, - { 0xf8, 0xd8 }, - { 0xf9, 0xd9 }, - { 0xfa, 0xda }, - { 0xfb, 0xdb }, - { 0xfc, 0xdc }, - { 0xfd, 0xdd }, - { 0xfe, 0xde } - }; + { 0xf0, 0xd0 }, + { 0xf1, 0xd1 }, + { 0xf2, 0xd2 }, + { 0xf3, 0xd3 }, + { 0xf4, 0xd4 }, + { 0xf5, 0xd5 }, + { 0xf6, 0xd6 }, + { 0xf8, 0xd8 }, + { 0xf9, 0xd9 }, + { 0xfa, 0xda }, + { 0xfb, 0xdb }, + { 0xfc, 0xdc }, + { 0xfd, 0xdd }, + { 0xfe, 0xde } + }; if (flag == ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) { *ccs = OnigAsciiPairAmbigCodes; @@ -272,8 +244,7 @@ OnigEncodingType OnigEncodingISO_8859_13 = { 1, /* max enc length */ 1, /* min enc length */ (ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE | - ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE | - ONIGENC_AMBIGUOUS_MATCH_COMPOUND), + ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE ), { (OnigCodePoint )'\\' /* esc */ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */ @@ -286,11 +257,11 @@ OnigEncodingType OnigEncodingISO_8859_13 = { onigenc_single_byte_mbc_to_code, onigenc_single_byte_code_to_mbclen, onigenc_single_byte_code_to_mbc, - iso_8859_13_mbc_to_normalize, - iso_8859_13_is_mbc_ambiguous, - iso_8859_13_get_all_pair_ambig_codes, + mbc_to_normalize, + is_mbc_ambiguous, + get_all_pair_ambig_codes, onigenc_ess_tsett_get_all_comp_ambig_codes, - iso_8859_13_is_code_ctype, + is_code_ctype, onigenc_not_support_get_ctype_code_range, onigenc_single_byte_left_adjust_char_head, onigenc_always_true_is_allowed_reverse_match diff --git a/ext/mbstring/oniguruma/enc/iso8859_14.c b/ext/mbstring/oniguruma/enc/iso8859_14.c index 4fe5ab29d1..44638cf13a 100644 --- a/ext/mbstring/oniguruma/enc/iso8859_14.c +++ b/ext/mbstring/oniguruma/enc/iso8859_14.c @@ -2,7 +2,7 @@ iso8859_14.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2005 K.Kosako + * Copyright (c) 2002-2007 K.Kosako * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -33,7 +33,7 @@ #define ENC_IS_ISO_8859_14_CTYPE(code,ctype) \ ((EncISO_8859_14_CtypeTable[code] & ctype) != 0) -static UChar EncISO_8859_14_ToLowerCaseTable[256] = { +static const UChar EncISO_8859_14_ToLowerCaseTable[256] = { '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007', '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017', '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027', @@ -68,23 +68,23 @@ static UChar EncISO_8859_14_ToLowerCaseTable[256] = { '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377' }; -static unsigned short EncISO_8859_14_CtypeTable[256] = { +static const unsigned short EncISO_8859_14_CtypeTable[256] = { 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x220c, 0x2209, 0x2208, 0x2208, 0x2208, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, - 0x2284, 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x21a0, 0x21a0, 0x21a0, - 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, + 0x2284, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, + 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, - 0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x20a0, 0x20a0, 0x20a0, 0x21a0, + 0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, - 0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x31a0, - 0x20a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2, + 0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x31a0, + 0x21a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, - 0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x20a0, 0x21a0, 0x20a0, 0x2008, + 0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x2008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, @@ -104,21 +104,11 @@ static unsigned short EncISO_8859_14_CtypeTable[256] = { }; static int -iso_8859_14_mbc_to_normalize(OnigAmbigType flag, - const UChar** pp, const UChar* end, UChar* lower) +mbc_to_normalize(OnigAmbigType flag, + const UChar** pp, const UChar* end, UChar* lower) { const UChar* p = *pp; - if (end > p + 1 && (flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0) { - if ((*p == 's' && *(p+1) == 's') || - ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 && - (*p == 'S' && *(p+1) == 'S'))) { - *lower = 0xdf; - (*pp) += 2; - return 1; - } - } - if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 && ONIGENC_IS_MBC_ASCII(p)) || ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 && @@ -133,27 +123,10 @@ iso_8859_14_mbc_to_normalize(OnigAmbigType flag, } static int -iso_8859_14_is_mbc_ambiguous(OnigAmbigType flag, - const UChar** pp, const UChar* end) +is_mbc_ambiguous(OnigAmbigType flag, const UChar** pp, const UChar* end) { const UChar* p = *pp; - if ((flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0) { - if (end > p + 1) { - if ((*p == 's' && *(p+1) == 's') || - ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 && - (*p == 'S' && *(p+1) == 'S'))) { - (*pp) += 2; - return TRUE; - } - } - - if (*p == 0xdf) { - (*pp)++; - return TRUE; - } - } - (*pp)++; if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 && ONIGENC_IS_MBC_ASCII(p)) || @@ -176,7 +149,7 @@ iso_8859_14_is_mbc_ambiguous(OnigAmbigType flag, } static int -iso_8859_14_is_code_ctype(OnigCodePoint code, unsigned int ctype) +is_code_ctype(OnigCodePoint code, unsigned int ctype) { if (code < 256) return ENC_IS_ISO_8859_14_CTYPE(code, ctype); @@ -185,103 +158,102 @@ iso_8859_14_is_code_ctype(OnigCodePoint code, unsigned int ctype) } static int -iso_8859_14_get_all_pair_ambig_codes(OnigAmbigType flag, - OnigPairAmbigCodes** ccs) +get_all_pair_ambig_codes(OnigAmbigType flag, const OnigPairAmbigCodes** ccs) { - static OnigPairAmbigCodes cc[] = { - { 0xa1, 0xa2 }, - { 0xa2, 0xa1 }, - { 0xa4, 0xa5 }, - { 0xa5, 0xa4 }, - { 0xa6, 0xab }, - { 0xa8, 0xb8 }, - { 0xaa, 0xba }, - { 0xab, 0xa6 }, - { 0xac, 0xbc }, - { 0xaf, 0xff }, + static const OnigPairAmbigCodes cc[] = { + { 0xa1, 0xa2 }, + { 0xa2, 0xa1 }, + { 0xa4, 0xa5 }, + { 0xa5, 0xa4 }, + { 0xa6, 0xab }, + { 0xa8, 0xb8 }, + { 0xaa, 0xba }, + { 0xab, 0xa6 }, + { 0xac, 0xbc }, + { 0xaf, 0xff }, - { 0xb0, 0xb1 }, - { 0xb1, 0xb0 }, - { 0xb2, 0xb3 }, - { 0xb3, 0xb2 }, - { 0xb4, 0xb5 }, - { 0xb5, 0xb4 }, - { 0xb7, 0xb9 }, - { 0xb8, 0xa8 }, - { 0xb9, 0xb7 }, - { 0xba, 0xaa }, - { 0xbb, 0xbf }, - { 0xbc, 0xac }, - { 0xbd, 0xbe }, - { 0xbe, 0xbd }, - { 0xbf, 0xbb }, + { 0xb0, 0xb1 }, + { 0xb1, 0xb0 }, + { 0xb2, 0xb3 }, + { 0xb3, 0xb2 }, + { 0xb4, 0xb5 }, + { 0xb5, 0xb4 }, + { 0xb7, 0xb9 }, + { 0xb8, 0xa8 }, + { 0xb9, 0xb7 }, + { 0xba, 0xaa }, + { 0xbb, 0xbf }, + { 0xbc, 0xac }, + { 0xbd, 0xbe }, + { 0xbe, 0xbd }, + { 0xbf, 0xbb }, - { 0xc0, 0xe0 }, - { 0xc1, 0xe1 }, - { 0xc2, 0xe2 }, - { 0xc3, 0xe3 }, - { 0xc4, 0xe4 }, - { 0xc5, 0xe5 }, - { 0xc6, 0xe6 }, - { 0xc7, 0xe7 }, - { 0xc8, 0xe8 }, - { 0xc9, 0xe9 }, - { 0xca, 0xea }, - { 0xcb, 0xeb }, - { 0xcc, 0xec }, - { 0xcd, 0xed }, - { 0xce, 0xee }, - { 0xcf, 0xef }, + { 0xc0, 0xe0 }, + { 0xc1, 0xe1 }, + { 0xc2, 0xe2 }, + { 0xc3, 0xe3 }, + { 0xc4, 0xe4 }, + { 0xc5, 0xe5 }, + { 0xc6, 0xe6 }, + { 0xc7, 0xe7 }, + { 0xc8, 0xe8 }, + { 0xc9, 0xe9 }, + { 0xca, 0xea }, + { 0xcb, 0xeb }, + { 0xcc, 0xec }, + { 0xcd, 0xed }, + { 0xce, 0xee }, + { 0xcf, 0xef }, - { 0xd0, 0xf0 }, - { 0xd1, 0xf1 }, - { 0xd2, 0xf2 }, - { 0xd3, 0xf3 }, - { 0xd4, 0xf4 }, - { 0xd5, 0xf5 }, - { 0xd6, 0xf6 }, - { 0xd7, 0xf7 }, - { 0xd8, 0xf8 }, - { 0xd9, 0xf9 }, - { 0xda, 0xfa }, - { 0xdb, 0xfb }, - { 0xdc, 0xfc }, - { 0xdd, 0xfd }, - { 0xde, 0xfe }, + { 0xd0, 0xf0 }, + { 0xd1, 0xf1 }, + { 0xd2, 0xf2 }, + { 0xd3, 0xf3 }, + { 0xd4, 0xf4 }, + { 0xd5, 0xf5 }, + { 0xd6, 0xf6 }, + { 0xd7, 0xf7 }, + { 0xd8, 0xf8 }, + { 0xd9, 0xf9 }, + { 0xda, 0xfa }, + { 0xdb, 0xfb }, + { 0xdc, 0xfc }, + { 0xdd, 0xfd }, + { 0xde, 0xfe }, - { 0xe0, 0xc0 }, - { 0xe1, 0xc1 }, - { 0xe2, 0xc2 }, - { 0xe3, 0xc3 }, - { 0xe4, 0xc4 }, - { 0xe5, 0xc5 }, - { 0xe6, 0xc6 }, - { 0xe7, 0xc7 }, - { 0xe8, 0xc8 }, - { 0xe9, 0xc9 }, - { 0xea, 0xca }, - { 0xeb, 0xcb }, - { 0xec, 0xcc }, - { 0xed, 0xcd }, - { 0xee, 0xce }, - { 0xef, 0xcf }, + { 0xe0, 0xc0 }, + { 0xe1, 0xc1 }, + { 0xe2, 0xc2 }, + { 0xe3, 0xc3 }, + { 0xe4, 0xc4 }, + { 0xe5, 0xc5 }, + { 0xe6, 0xc6 }, + { 0xe7, 0xc7 }, + { 0xe8, 0xc8 }, + { 0xe9, 0xc9 }, + { 0xea, 0xca }, + { 0xeb, 0xcb }, + { 0xec, 0xcc }, + { 0xed, 0xcd }, + { 0xee, 0xce }, + { 0xef, 0xcf }, - { 0xf0, 0xd0 }, - { 0xf1, 0xd1 }, - { 0xf2, 0xd2 }, - { 0xf3, 0xd3 }, - { 0xf4, 0xd4 }, - { 0xf5, 0xd5 }, - { 0xf6, 0xd6 }, - { 0xf7, 0xd7 }, - { 0xf8, 0xd8 }, - { 0xf9, 0xd9 }, - { 0xfa, 0xda }, - { 0xfb, 0xdb }, - { 0xfc, 0xdc }, - { 0xfd, 0xdd }, - { 0xfe, 0xde }, - { 0xff, 0xaf } + { 0xf0, 0xd0 }, + { 0xf1, 0xd1 }, + { 0xf2, 0xd2 }, + { 0xf3, 0xd3 }, + { 0xf4, 0xd4 }, + { 0xf5, 0xd5 }, + { 0xf6, 0xd6 }, + { 0xf7, 0xd7 }, + { 0xf8, 0xd8 }, + { 0xf9, 0xd9 }, + { 0xfa, 0xda }, + { 0xfb, 0xdb }, + { 0xfc, 0xdc }, + { 0xfd, 0xdd }, + { 0xfe, 0xde }, + { 0xff, 0xaf } }; if (flag == ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) { @@ -302,8 +274,7 @@ OnigEncodingType OnigEncodingISO_8859_14 = { 1, /* max enc length */ 1, /* min enc length */ (ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE | - ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE | - ONIGENC_AMBIGUOUS_MATCH_COMPOUND), + ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE ), { (OnigCodePoint )'\\' /* esc */ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */ @@ -316,11 +287,11 @@ OnigEncodingType OnigEncodingISO_8859_14 = { onigenc_single_byte_mbc_to_code, onigenc_single_byte_code_to_mbclen, onigenc_single_byte_code_to_mbc, - iso_8859_14_mbc_to_normalize, - iso_8859_14_is_mbc_ambiguous, - iso_8859_14_get_all_pair_ambig_codes, + mbc_to_normalize, + is_mbc_ambiguous, + get_all_pair_ambig_codes, onigenc_ess_tsett_get_all_comp_ambig_codes, - iso_8859_14_is_code_ctype, + is_code_ctype, onigenc_not_support_get_ctype_code_range, onigenc_single_byte_left_adjust_char_head, onigenc_always_true_is_allowed_reverse_match diff --git a/ext/mbstring/oniguruma/enc/iso8859_15.c b/ext/mbstring/oniguruma/enc/iso8859_15.c index 1a8bd7b4c5..f643b895df 100644 --- a/ext/mbstring/oniguruma/enc/iso8859_15.c +++ b/ext/mbstring/oniguruma/enc/iso8859_15.c @@ -2,7 +2,7 @@ iso8859_15.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2005 K.Kosako + * Copyright (c) 2002-2007 K.Kosako * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -33,7 +33,7 @@ #define ENC_IS_ISO_8859_15_CTYPE(code,ctype) \ ((EncISO_8859_15_CtypeTable[code] & ctype) != 0) -static UChar EncISO_8859_15_ToLowerCaseTable[256] = { +static const UChar EncISO_8859_15_ToLowerCaseTable[256] = { '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007', '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017', '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027', @@ -68,23 +68,23 @@ static UChar EncISO_8859_15_ToLowerCaseTable[256] = { '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377' }; -static unsigned short EncISO_8859_15_CtypeTable[256] = { +static const unsigned short EncISO_8859_15_CtypeTable[256] = { 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x220c, 0x2209, 0x2208, 0x2208, 0x2208, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, - 0x2284, 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x21a0, 0x21a0, 0x21a0, - 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, + 0x2284, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, + 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, - 0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x20a0, 0x20a0, 0x20a0, 0x21a0, + 0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, - 0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x31a0, - 0x20a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2, + 0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x31a0, + 0x21a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, - 0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x20a0, 0x21a0, 0x20a0, 0x2008, + 0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x2008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, @@ -104,21 +104,11 @@ static unsigned short EncISO_8859_15_CtypeTable[256] = { }; static int -iso_8859_15_mbc_to_normalize(OnigAmbigType flag, - const UChar** pp, const UChar* end, UChar* lower) +mbc_to_normalize(OnigAmbigType flag, + const UChar** pp, const UChar* end, UChar* lower) { const UChar* p = *pp; - if (end > p + 1 && (flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0) { - if ((*p == 's' && *(p+1) == 's') || - ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 && - (*p == 'S' && *(p+1) == 'S'))) { - *lower = 0xdf; - (*pp) += 2; - return 1; - } - } - if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 && ONIGENC_IS_MBC_ASCII(p)) || ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 && @@ -133,27 +123,10 @@ iso_8859_15_mbc_to_normalize(OnigAmbigType flag, } static int -iso_8859_15_is_mbc_ambiguous(OnigAmbigType flag, - const UChar** pp, const UChar* end) +is_mbc_ambiguous(OnigAmbigType flag, const UChar** pp, const UChar* end) { const UChar* p = *pp; - if ((flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0) { - if (end > p + 1) { - if ((*p == 's' && *(p+1) == 's') || - ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 && - (*p == 'S' && *(p+1) == 'S'))) { - (*pp) += 2; - return TRUE; - } - } - - if (*p == 0xdf) { - (*pp)++; - return TRUE; - } - } - (*pp)++; if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 && ONIGENC_IS_MBC_ASCII(p)) || @@ -176,7 +149,7 @@ iso_8859_15_is_mbc_ambiguous(OnigAmbigType flag, } static int -iso_8859_15_is_code_ctype(OnigCodePoint code, unsigned int ctype) +is_code_ctype(OnigCodePoint code, unsigned int ctype) { if (code < 256) return ENC_IS_ISO_8859_15_CTYPE(code, ctype); @@ -185,10 +158,10 @@ iso_8859_15_is_code_ctype(OnigCodePoint code, unsigned int ctype) } static int -iso_8859_15_get_all_pair_ambig_codes(OnigAmbigType flag, - OnigPairAmbigCodes** ccs) +get_all_pair_ambig_codes(OnigAmbigType flag, + const OnigPairAmbigCodes** ccs) { - static OnigPairAmbigCodes cc[] = { + static const OnigPairAmbigCodes cc[] = { { 0xa6, 0xa8 }, { 0xa8, 0xa6 }, @@ -282,8 +255,7 @@ OnigEncodingType OnigEncodingISO_8859_15 = { 1, /* max enc length */ 1, /* min enc length */ (ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE | - ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE | - ONIGENC_AMBIGUOUS_MATCH_COMPOUND), + ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE ), { (OnigCodePoint )'\\' /* esc */ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */ @@ -296,11 +268,11 @@ OnigEncodingType OnigEncodingISO_8859_15 = { onigenc_single_byte_mbc_to_code, onigenc_single_byte_code_to_mbclen, onigenc_single_byte_code_to_mbc, - iso_8859_15_mbc_to_normalize, - iso_8859_15_is_mbc_ambiguous, - iso_8859_15_get_all_pair_ambig_codes, + mbc_to_normalize, + is_mbc_ambiguous, + get_all_pair_ambig_codes, onigenc_ess_tsett_get_all_comp_ambig_codes, - iso_8859_15_is_code_ctype, + is_code_ctype, onigenc_not_support_get_ctype_code_range, onigenc_single_byte_left_adjust_char_head, onigenc_always_true_is_allowed_reverse_match diff --git a/ext/mbstring/oniguruma/enc/iso8859_16.c b/ext/mbstring/oniguruma/enc/iso8859_16.c index e283db17cc..921ae36d9d 100644 --- a/ext/mbstring/oniguruma/enc/iso8859_16.c +++ b/ext/mbstring/oniguruma/enc/iso8859_16.c @@ -2,7 +2,7 @@ iso8859_16.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2005 K.Kosako + * Copyright (c) 2002-2007 K.Kosako * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -33,7 +33,7 @@ #define ENC_IS_ISO_8859_16_CTYPE(code,ctype) \ ((EncISO_8859_16_CtypeTable[code] & ctype) != 0) -static UChar EncISO_8859_16_ToLowerCaseTable[256] = { +static const UChar EncISO_8859_16_ToLowerCaseTable[256] = { '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007', '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017', '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027', @@ -68,23 +68,23 @@ static UChar EncISO_8859_16_ToLowerCaseTable[256] = { '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377' }; -static unsigned short EncISO_8859_16_CtypeTable[256] = { +static const unsigned short EncISO_8859_16_CtypeTable[256] = { 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x220c, 0x2209, 0x2208, 0x2208, 0x2208, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, - 0x2284, 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x21a0, 0x21a0, 0x21a0, - 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, + 0x2284, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, + 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, - 0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x20a0, 0x20a0, 0x20a0, 0x21a0, + 0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, - 0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x31a0, - 0x20a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2, + 0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x31a0, + 0x21a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, - 0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x20a0, 0x21a0, 0x20a0, 0x2008, + 0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x2008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, @@ -104,21 +104,11 @@ static unsigned short EncISO_8859_16_CtypeTable[256] = { }; static int -iso_8859_16_mbc_to_normalize(OnigAmbigType flag, - const UChar** pp, const UChar* end, UChar* lower) +mbc_to_normalize(OnigAmbigType flag, + const UChar** pp, const UChar* end, UChar* lower) { const UChar* p = *pp; - if (end > p + 1 && (flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0) { - if ((*p == 's' && *(p+1) == 's') || - ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 && - (*p == 'S' && *(p+1) == 'S'))) { - *lower = 0xdf; - (*pp) += 2; - return 1; - } - } - if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 && ONIGENC_IS_MBC_ASCII(p)) || ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 && @@ -133,27 +123,10 @@ iso_8859_16_mbc_to_normalize(OnigAmbigType flag, } static int -iso_8859_16_is_mbc_ambiguous(OnigAmbigType flag, - const UChar** pp, const UChar* end) +is_mbc_ambiguous(OnigAmbigType flag, const UChar** pp, const UChar* end) { const UChar* p = *pp; - if ((flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0) { - if (end > p + 1) { - if ((*p == 's' && *(p+1) == 's') || - ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 && - (*p == 'S' && *(p+1) == 'S'))) { - (*pp) += 2; - return TRUE; - } - } - - if (*p == 0xdf) { - (*pp)++; - return TRUE; - } - } - (*pp)++; if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 && ONIGENC_IS_MBC_ASCII(p)) || @@ -176,7 +149,7 @@ iso_8859_16_is_mbc_ambiguous(OnigAmbigType flag, } static int -iso_8859_16_is_code_ctype(OnigCodePoint code, unsigned int ctype) +is_code_ctype(OnigCodePoint code, unsigned int ctype) { if (code < 256) return ENC_IS_ISO_8859_16_CTYPE(code, ctype); @@ -185,97 +158,96 @@ iso_8859_16_is_code_ctype(OnigCodePoint code, unsigned int ctype) } static int -iso_8859_16_get_all_pair_ambig_codes(OnigAmbigType flag, - OnigPairAmbigCodes** ccs) +get_all_pair_ambig_codes(OnigAmbigType flag, const OnigPairAmbigCodes** ccs) { - static OnigPairAmbigCodes cc[] = { - { 0xa1, 0xa2 }, - { 0xa2, 0xa1 }, - { 0xa3, 0xb3 }, - { 0xa6, 0xa8 }, - { 0xa8, 0xa6 }, - { 0xaa, 0xba }, - { 0xac, 0xae }, - { 0xae, 0xac }, - { 0xaf, 0xbf }, + static const OnigPairAmbigCodes cc[] = { + { 0xa1, 0xa2 }, + { 0xa2, 0xa1 }, + { 0xa3, 0xb3 }, + { 0xa6, 0xa8 }, + { 0xa8, 0xa6 }, + { 0xaa, 0xba }, + { 0xac, 0xae }, + { 0xae, 0xac }, + { 0xaf, 0xbf }, - { 0xb2, 0xb9 }, - { 0xb3, 0xa3 }, - { 0xb4, 0xb8 }, - { 0xb8, 0xb4 }, - { 0xb9, 0xb2 }, - { 0xba, 0xaa }, - { 0xbc, 0xbd }, - { 0xbd, 0xbc }, - { 0xbe, 0xff }, - { 0xbf, 0xaf }, + { 0xb2, 0xb9 }, + { 0xb3, 0xa3 }, + { 0xb4, 0xb8 }, + { 0xb8, 0xb4 }, + { 0xb9, 0xb2 }, + { 0xba, 0xaa }, + { 0xbc, 0xbd }, + { 0xbd, 0xbc }, + { 0xbe, 0xff }, + { 0xbf, 0xaf }, - { 0xc0, 0xe0 }, - { 0xc1, 0xe1 }, - { 0xc2, 0xe2 }, - { 0xc3, 0xe3 }, - { 0xc4, 0xe4 }, - { 0xc5, 0xe5 }, - { 0xc6, 0xe6 }, - { 0xc7, 0xe7 }, - { 0xc8, 0xe8 }, - { 0xc9, 0xe9 }, - { 0xca, 0xea }, - { 0xcb, 0xeb }, - { 0xcc, 0xec }, - { 0xcd, 0xed }, - { 0xce, 0xee }, - { 0xcf, 0xef }, + { 0xc0, 0xe0 }, + { 0xc1, 0xe1 }, + { 0xc2, 0xe2 }, + { 0xc3, 0xe3 }, + { 0xc4, 0xe4 }, + { 0xc5, 0xe5 }, + { 0xc6, 0xe6 }, + { 0xc7, 0xe7 }, + { 0xc8, 0xe8 }, + { 0xc9, 0xe9 }, + { 0xca, 0xea }, + { 0xcb, 0xeb }, + { 0xcc, 0xec }, + { 0xcd, 0xed }, + { 0xce, 0xee }, + { 0xcf, 0xef }, - { 0xd0, 0xf0 }, - { 0xd1, 0xf1 }, - { 0xd2, 0xf2 }, - { 0xd3, 0xf3 }, - { 0xd4, 0xf4 }, - { 0xd5, 0xf5 }, - { 0xd6, 0xf6 }, - { 0xd7, 0xf7 }, - { 0xd8, 0xf8 }, - { 0xd9, 0xf9 }, - { 0xda, 0xfa }, - { 0xdb, 0xfb }, - { 0xdc, 0xfc }, - { 0xdd, 0xfd }, - { 0xde, 0xfe }, + { 0xd0, 0xf0 }, + { 0xd1, 0xf1 }, + { 0xd2, 0xf2 }, + { 0xd3, 0xf3 }, + { 0xd4, 0xf4 }, + { 0xd5, 0xf5 }, + { 0xd6, 0xf6 }, + { 0xd7, 0xf7 }, + { 0xd8, 0xf8 }, + { 0xd9, 0xf9 }, + { 0xda, 0xfa }, + { 0xdb, 0xfb }, + { 0xdc, 0xfc }, + { 0xdd, 0xfd }, + { 0xde, 0xfe }, - { 0xe0, 0xc0 }, - { 0xe1, 0xc1 }, - { 0xe2, 0xc2 }, - { 0xe3, 0xc3 }, - { 0xe4, 0xc4 }, - { 0xe5, 0xc5 }, - { 0xe6, 0xc6 }, - { 0xe7, 0xc7 }, - { 0xe8, 0xc8 }, - { 0xe9, 0xc9 }, - { 0xea, 0xca }, - { 0xeb, 0xcb }, - { 0xec, 0xcc }, - { 0xed, 0xcd }, - { 0xee, 0xce }, - { 0xef, 0xcf }, + { 0xe0, 0xc0 }, + { 0xe1, 0xc1 }, + { 0xe2, 0xc2 }, + { 0xe3, 0xc3 }, + { 0xe4, 0xc4 }, + { 0xe5, 0xc5 }, + { 0xe6, 0xc6 }, + { 0xe7, 0xc7 }, + { 0xe8, 0xc8 }, + { 0xe9, 0xc9 }, + { 0xea, 0xca }, + { 0xeb, 0xcb }, + { 0xec, 0xcc }, + { 0xed, 0xcd }, + { 0xee, 0xce }, + { 0xef, 0xcf }, - { 0xf0, 0xd0 }, - { 0xf1, 0xd1 }, - { 0xf2, 0xd2 }, - { 0xf3, 0xd3 }, - { 0xf4, 0xd4 }, - { 0xf5, 0xd5 }, - { 0xf6, 0xd6 }, - { 0xf7, 0xd7 }, - { 0xf8, 0xd8 }, - { 0xf9, 0xd9 }, - { 0xfa, 0xda }, - { 0xfb, 0xdb }, - { 0xfc, 0xdc }, - { 0xfd, 0xdd }, - { 0xfe, 0xde }, - { 0xff, 0xbe } + { 0xf0, 0xd0 }, + { 0xf1, 0xd1 }, + { 0xf2, 0xd2 }, + { 0xf3, 0xd3 }, + { 0xf4, 0xd4 }, + { 0xf5, 0xd5 }, + { 0xf6, 0xd6 }, + { 0xf7, 0xd7 }, + { 0xf8, 0xd8 }, + { 0xf9, 0xd9 }, + { 0xfa, 0xda }, + { 0xfb, 0xdb }, + { 0xfc, 0xdc }, + { 0xfd, 0xdd }, + { 0xfe, 0xde }, + { 0xff, 0xbe } }; if (flag == ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) { @@ -296,8 +268,7 @@ OnigEncodingType OnigEncodingISO_8859_16 = { 1, /* max enc length */ 1, /* min enc length */ (ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE | - ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE | - ONIGENC_AMBIGUOUS_MATCH_COMPOUND), + ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE ), { (OnigCodePoint )'\\' /* esc */ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */ @@ -310,11 +281,11 @@ OnigEncodingType OnigEncodingISO_8859_16 = { onigenc_single_byte_mbc_to_code, onigenc_single_byte_code_to_mbclen, onigenc_single_byte_code_to_mbc, - iso_8859_16_mbc_to_normalize, - iso_8859_16_is_mbc_ambiguous, - iso_8859_16_get_all_pair_ambig_codes, + mbc_to_normalize, + is_mbc_ambiguous, + get_all_pair_ambig_codes, onigenc_ess_tsett_get_all_comp_ambig_codes, - iso_8859_16_is_code_ctype, + is_code_ctype, onigenc_not_support_get_ctype_code_range, onigenc_single_byte_left_adjust_char_head, onigenc_always_true_is_allowed_reverse_match diff --git a/ext/mbstring/oniguruma/enc/iso8859_2.c b/ext/mbstring/oniguruma/enc/iso8859_2.c index e86415b9c9..f8cb3756f2 100644 --- a/ext/mbstring/oniguruma/enc/iso8859_2.c +++ b/ext/mbstring/oniguruma/enc/iso8859_2.c @@ -2,7 +2,7 @@ iso8859_2.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2005 K.Kosako + * Copyright (c) 2002-2006 K.Kosako * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -33,7 +33,7 @@ #define ENC_IS_ISO_8859_2_CTYPE(code,ctype) \ ((EncISO_8859_2_CtypeTable[code] & ctype) != 0) -static UChar EncISO_8859_2_ToLowerCaseTable[256] = { +static const UChar EncISO_8859_2_ToLowerCaseTable[256] = { '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007', '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017', '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027', @@ -68,23 +68,23 @@ static UChar EncISO_8859_2_ToLowerCaseTable[256] = { '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377' }; -static unsigned short EncISO_8859_2_CtypeTable[256] = { +static const unsigned short EncISO_8859_2_CtypeTable[256] = { 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x220c, 0x2209, 0x2208, 0x2208, 0x2208, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, - 0x2284, 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x21a0, 0x21a0, 0x21a0, - 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, + 0x2284, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, + 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, - 0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x20a0, 0x20a0, 0x20a0, 0x21a0, + 0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, - 0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x31a0, - 0x20a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2, + 0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x31a0, + 0x21a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, - 0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x20a0, 0x21a0, 0x20a0, 0x2008, + 0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x2008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, @@ -109,16 +109,6 @@ iso_8859_2_mbc_to_normalize(OnigAmbigType flag, { const UChar* p = *pp; - if (end > p + 1 && (flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0) { - if ((*p == 's' && *(p+1) == 's') || - ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 && - (*p == 'S' && *(p+1) == 'S'))) { - *lower = 0xdf; - (*pp) += 2; - return 1; - } - } - if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 && ONIGENC_IS_MBC_ASCII(p)) || ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 && @@ -138,22 +128,6 @@ iso_8859_2_is_mbc_ambiguous(OnigAmbigType flag, { const UChar* p = *pp; - if ((flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0) { - if (end > p + 1) { - if ((*p == 's' && *(p+1) == 's') || - ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 && - (*p == 'S' && *(p+1) == 'S'))) { - (*pp) += 2; - return TRUE; - } - } - - if (*p == 0xdf) { - (*pp)++; - return TRUE; - } - } - (*pp)++; if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 && ONIGENC_IS_MBC_ASCII(p)) || @@ -177,9 +151,9 @@ iso_8859_2_is_mbc_ambiguous(OnigAmbigType flag, static int iso_8859_2_get_all_pair_ambig_codes(OnigAmbigType flag, - OnigPairAmbigCodes** ccs) + const OnigPairAmbigCodes** ccs) { - static OnigPairAmbigCodes cc[] = { + static const OnigPairAmbigCodes cc[] = { { 0xa1, 0xb1 }, { 0xa3, 0xb3 }, { 0xa5, 0xb5 }, @@ -294,8 +268,7 @@ OnigEncodingType OnigEncodingISO_8859_2 = { 1, /* max enc length */ 1, /* min enc length */ (ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE | - ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE | - ONIGENC_AMBIGUOUS_MATCH_COMPOUND), + ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE ), { (OnigCodePoint )'\\' /* esc */ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */ diff --git a/ext/mbstring/oniguruma/enc/iso8859_3.c b/ext/mbstring/oniguruma/enc/iso8859_3.c index 76d2bec8a8..e62d20de7b 100644 --- a/ext/mbstring/oniguruma/enc/iso8859_3.c +++ b/ext/mbstring/oniguruma/enc/iso8859_3.c @@ -2,7 +2,7 @@ iso8859_3.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2005 K.Kosako + * Copyright (c) 2002-2006 K.Kosako * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -33,7 +33,7 @@ #define ENC_IS_ISO_8859_3_CTYPE(code,ctype) \ ((EncISO_8859_3_CtypeTable[code] & ctype) != 0) -static UChar EncISO_8859_3_ToLowerCaseTable[256] = { +static const UChar EncISO_8859_3_ToLowerCaseTable[256] = { '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007', '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017', '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027', @@ -68,23 +68,23 @@ static UChar EncISO_8859_3_ToLowerCaseTable[256] = { '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377' }; -static unsigned short EncISO_8859_3_CtypeTable[256] = { +static const unsigned short EncISO_8859_3_CtypeTable[256] = { 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x220c, 0x2209, 0x2208, 0x2208, 0x2208, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, - 0x2284, 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x21a0, 0x21a0, 0x21a0, - 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, + 0x2284, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, + 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, - 0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x20a0, 0x20a0, 0x20a0, 0x21a0, + 0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, - 0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x31a0, - 0x20a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2, + 0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x31a0, + 0x21a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, - 0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x20a0, 0x21a0, 0x20a0, 0x2008, + 0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x2008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, @@ -109,16 +109,6 @@ iso_8859_3_mbc_to_normalize(OnigAmbigType flag, { const UChar* p = *pp; - if (end > p + 1 && (flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0) { - if ((*p == 's' && *(p+1) == 's') || - ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 && - (*p == 'S' && *(p+1) == 'S'))) { - *lower = 0xdf; - (*pp) += 2; - return 1; - } - } - if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 && ONIGENC_IS_MBC_ASCII(p)) || ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 && @@ -138,22 +128,6 @@ iso_8859_3_is_mbc_ambiguous(OnigAmbigType flag, { const UChar* p = *pp; - if ((flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0) { - if (end > p + 1) { - if ((*p == 's' && *(p+1) == 's') || - ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 && - (*p == 'S' && *(p+1) == 'S'))) { - (*pp) += 2; - return TRUE; - } - } - - if (*p == 0xdf) { - (*pp)++; - return TRUE; - } - } - (*pp)++; if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 && ONIGENC_IS_MBC_ASCII(p)) || @@ -186,9 +160,9 @@ iso_8859_3_is_code_ctype(OnigCodePoint code, unsigned int ctype) static int iso_8859_3_get_all_pair_ambig_codes(OnigAmbigType flag, - OnigPairAmbigCodes** ccs) + const OnigPairAmbigCodes** ccs) { - static OnigPairAmbigCodes cc[] = { + static const OnigPairAmbigCodes cc[] = { { 0xa1, 0xb1 }, { 0xa6, 0xb6 }, { 0xa9, 0xb9 }, @@ -283,8 +257,7 @@ OnigEncodingType OnigEncodingISO_8859_3 = { 1, /* max enc length */ 1, /* min enc length */ (ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE | - ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE | - ONIGENC_AMBIGUOUS_MATCH_COMPOUND), + ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE ), { (OnigCodePoint )'\\' /* esc */ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */ diff --git a/ext/mbstring/oniguruma/enc/iso8859_4.c b/ext/mbstring/oniguruma/enc/iso8859_4.c index 7569006725..dd6bd7dfe3 100644 --- a/ext/mbstring/oniguruma/enc/iso8859_4.c +++ b/ext/mbstring/oniguruma/enc/iso8859_4.c @@ -2,7 +2,7 @@ iso8859_4.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2005 K.Kosako + * Copyright (c) 2002-2006 K.Kosako * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -33,7 +33,7 @@ #define ENC_IS_ISO_8859_4_CTYPE(code,ctype) \ ((EncISO_8859_4_CtypeTable[code] & ctype) != 0) -static UChar EncISO_8859_4_ToLowerCaseTable[256] = { +static const UChar EncISO_8859_4_ToLowerCaseTable[256] = { '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007', '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017', '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027', @@ -68,23 +68,23 @@ static UChar EncISO_8859_4_ToLowerCaseTable[256] = { '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377' }; -static unsigned short EncISO_8859_4_CtypeTable[256] = { +static const unsigned short EncISO_8859_4_CtypeTable[256] = { 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x220c, 0x2209, 0x2208, 0x2208, 0x2208, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, - 0x2284, 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x21a0, 0x21a0, 0x21a0, - 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, + 0x2284, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, + 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, - 0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x20a0, 0x20a0, 0x20a0, 0x21a0, + 0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, - 0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x31a0, - 0x20a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2, + 0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x31a0, + 0x21a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, - 0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x20a0, 0x21a0, 0x20a0, 0x2008, + 0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x2008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, @@ -109,16 +109,6 @@ iso_8859_4_mbc_to_normalize(OnigAmbigType flag, { const UChar* p = *pp; - if (end > p + 1 && (flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0) { - if ((*p == 's' && *(p+1) == 's') || - ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 && - (*p == 'S' && *(p+1) == 'S'))) { - *lower = 0xdf; - (*pp) += 2; - return 1; - } - } - if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 && ONIGENC_IS_MBC_ASCII(p)) || ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 && @@ -138,22 +128,6 @@ iso_8859_4_is_mbc_ambiguous(OnigAmbigType flag, { const UChar* p = *pp; - if ((flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0) { - if (end > p + 1) { - if ((*p == 's' && *(p+1) == 's') || - ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 && - (*p == 'S' && *(p+1) == 'S'))) { - (*pp) += 2; - return TRUE; - } - } - - if (*p == 0xdf) { - (*pp)++; - return TRUE; - } - } - (*pp)++; if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 && ONIGENC_IS_MBC_ASCII(p)) || @@ -186,9 +160,9 @@ iso_8859_4_is_code_ctype(OnigCodePoint code, unsigned int ctype) static int iso_8859_4_get_all_pair_ambig_codes(OnigAmbigType flag, - OnigPairAmbigCodes** ccs) + const OnigPairAmbigCodes** ccs) { - static OnigPairAmbigCodes cc[] = { + static const OnigPairAmbigCodes cc[] = { { 0xa1, 0xb1 }, { 0xa3, 0xb3 }, { 0xa5, 0xb5 }, @@ -292,8 +266,7 @@ OnigEncodingType OnigEncodingISO_8859_4 = { 1, /* max enc length */ 1, /* min enc length */ (ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE | - ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE | - ONIGENC_AMBIGUOUS_MATCH_COMPOUND), + ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE ), { (OnigCodePoint )'\\' /* esc */ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */ diff --git a/ext/mbstring/oniguruma/enc/iso8859_5.c b/ext/mbstring/oniguruma/enc/iso8859_5.c index 2f7677b3e7..87b7fb8a29 100644 --- a/ext/mbstring/oniguruma/enc/iso8859_5.c +++ b/ext/mbstring/oniguruma/enc/iso8859_5.c @@ -33,7 +33,7 @@ #define ENC_IS_ISO_8859_5_CTYPE(code,ctype) \ ((EncISO_8859_5_CtypeTable[code] & ctype) != 0) -static UChar EncISO_8859_5_ToLowerCaseTable[256] = { +static const UChar EncISO_8859_5_ToLowerCaseTable[256] = { '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007', '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017', '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027', @@ -68,23 +68,23 @@ static UChar EncISO_8859_5_ToLowerCaseTable[256] = { '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377' }; -static unsigned short EncISO_8859_5_CtypeTable[256] = { +static const unsigned short EncISO_8859_5_CtypeTable[256] = { 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x220c, 0x2209, 0x2208, 0x2208, 0x2208, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, - 0x2284, 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x21a0, 0x21a0, 0x21a0, - 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, + 0x2284, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, + 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, - 0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x20a0, 0x20a0, 0x20a0, 0x21a0, + 0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, - 0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x31a0, - 0x20a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2, + 0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x31a0, + 0x21a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, - 0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x20a0, 0x21a0, 0x20a0, 0x2008, + 0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x2008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, @@ -152,9 +152,9 @@ iso_8859_5_is_code_ctype(OnigCodePoint code, unsigned int ctype) static int iso_8859_5_get_all_pair_ambig_codes(OnigAmbigType flag, - OnigPairAmbigCodes** ccs) + const OnigPairAmbigCodes** ccs) { - static OnigPairAmbigCodes cc[] = { + static const OnigPairAmbigCodes cc[] = { { 0xa1, 0xf1 }, { 0xa2, 0xf2 }, { 0xa3, 0xf3 }, diff --git a/ext/mbstring/oniguruma/enc/iso8859_6.c b/ext/mbstring/oniguruma/enc/iso8859_6.c index 0fcb9e8b83..fffcd0e7d1 100644 --- a/ext/mbstring/oniguruma/enc/iso8859_6.c +++ b/ext/mbstring/oniguruma/enc/iso8859_6.c @@ -32,23 +32,23 @@ #define ENC_IS_ISO_8859_6_CTYPE(code,ctype) \ ((EncISO_8859_6_CtypeTable[code] & ctype) != 0) -static unsigned short EncISO_8859_6_CtypeTable[256] = { +static const unsigned short EncISO_8859_6_CtypeTable[256] = { 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x220c, 0x2209, 0x2208, 0x2208, 0x2208, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, - 0x2284, 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x21a0, 0x21a0, 0x21a0, - 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, + 0x2284, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, + 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, - 0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x20a0, 0x20a0, 0x20a0, 0x21a0, + 0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, - 0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x31a0, - 0x20a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2, + 0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x31a0, + 0x21a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, - 0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x20a0, 0x21a0, 0x20a0, 0x2008, + 0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x2008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, diff --git a/ext/mbstring/oniguruma/enc/iso8859_7.c b/ext/mbstring/oniguruma/enc/iso8859_7.c index 8b2cb9ec59..e87661d84b 100644 --- a/ext/mbstring/oniguruma/enc/iso8859_7.c +++ b/ext/mbstring/oniguruma/enc/iso8859_7.c @@ -33,7 +33,7 @@ #define ENC_IS_ISO_8859_7_CTYPE(code,ctype) \ ((EncISO_8859_7_CtypeTable[code] & ctype) != 0) -static UChar EncISO_8859_7_ToLowerCaseTable[256] = { +static const UChar EncISO_8859_7_ToLowerCaseTable[256] = { '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007', '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017', '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027', @@ -68,23 +68,23 @@ static UChar EncISO_8859_7_ToLowerCaseTable[256] = { '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377' }; -static unsigned short EncISO_8859_7_CtypeTable[256] = { +static const unsigned short EncISO_8859_7_CtypeTable[256] = { 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x220c, 0x2209, 0x2208, 0x2208, 0x2208, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, - 0x2284, 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x21a0, 0x21a0, 0x21a0, - 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, + 0x2284, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, + 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, - 0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x20a0, 0x20a0, 0x20a0, 0x21a0, + 0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, - 0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x31a0, - 0x20a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2, + 0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x31a0, + 0x21a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, - 0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x20a0, 0x21a0, 0x20a0, 0x2008, + 0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x2008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, @@ -159,9 +159,9 @@ iso_8859_7_is_code_ctype(OnigCodePoint code, unsigned int ctype) static int iso_8859_7_get_all_pair_ambig_codes(OnigAmbigType flag, - OnigPairAmbigCodes** ccs) + const OnigPairAmbigCodes** ccs) { - static OnigPairAmbigCodes cc[] = { + static const OnigPairAmbigCodes cc[] = { { 0xb6, 0xdc }, { 0xb8, 0xdd }, { 0xb9, 0xde }, diff --git a/ext/mbstring/oniguruma/enc/iso8859_8.c b/ext/mbstring/oniguruma/enc/iso8859_8.c index 3c95b9b137..e76966c667 100644 --- a/ext/mbstring/oniguruma/enc/iso8859_8.c +++ b/ext/mbstring/oniguruma/enc/iso8859_8.c @@ -32,23 +32,23 @@ #define ENC_IS_ISO_8859_8_CTYPE(code,ctype) \ ((EncISO_8859_8_CtypeTable[code] & ctype) != 0) -static unsigned short EncISO_8859_8_CtypeTable[256] = { +static const unsigned short EncISO_8859_8_CtypeTable[256] = { 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x220c, 0x2209, 0x2208, 0x2208, 0x2208, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, - 0x2284, 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x21a0, 0x21a0, 0x21a0, - 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, + 0x2284, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, + 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, - 0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x20a0, 0x20a0, 0x20a0, 0x21a0, + 0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, - 0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x31a0, - 0x20a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2, + 0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x31a0, + 0x21a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, - 0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x20a0, 0x21a0, 0x20a0, 0x2008, + 0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x2008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, diff --git a/ext/mbstring/oniguruma/enc/iso8859_9.c b/ext/mbstring/oniguruma/enc/iso8859_9.c index 1b061ff6ea..16a30c5f24 100644 --- a/ext/mbstring/oniguruma/enc/iso8859_9.c +++ b/ext/mbstring/oniguruma/enc/iso8859_9.c @@ -2,7 +2,7 @@ iso8859_9.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2005 K.Kosako + * Copyright (c) 2002-2006 K.Kosako * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -33,7 +33,7 @@ #define ENC_IS_ISO_8859_9_CTYPE(code,ctype) \ ((EncISO_8859_9_CtypeTable[code] & ctype) != 0) -static UChar EncISO_8859_9_ToLowerCaseTable[256] = { +static const UChar EncISO_8859_9_ToLowerCaseTable[256] = { '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007', '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017', '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027', @@ -68,23 +68,23 @@ static UChar EncISO_8859_9_ToLowerCaseTable[256] = { '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377' }; -static unsigned short EncISO_8859_9_CtypeTable[256] = { +static const unsigned short EncISO_8859_9_CtypeTable[256] = { 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x220c, 0x2209, 0x2208, 0x2208, 0x2208, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, - 0x2284, 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x21a0, 0x21a0, 0x21a0, - 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, + 0x2284, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, + 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, - 0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x20a0, 0x20a0, 0x20a0, 0x21a0, + 0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, - 0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x31a0, - 0x20a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2, + 0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x31a0, + 0x21a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, - 0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x20a0, 0x21a0, 0x20a0, 0x2008, + 0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x2008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, @@ -109,16 +109,6 @@ iso_8859_9_mbc_to_normalize(OnigAmbigType flag, { const UChar* p = *pp; - if (end > p + 1 && (flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0) { - if ((*p == 's' && *(p+1) == 's') || - ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 && - (*p == 'S' && *(p+1) == 'S'))) { - *lower = 0xdf; - (*pp) += 2; - return 1; - } - } - if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 && ONIGENC_IS_MBC_ASCII(p)) || ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 && @@ -138,22 +128,6 @@ iso_8859_9_is_mbc_ambiguous(OnigAmbigType flag, { const UChar* p = *pp; - if ((flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0) { - if (end > p + 1) { - if ((*p == 's' && *(p+1) == 's') || - ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 && - (*p == 'S' && *(p+1) == 'S'))) { - (*pp) += 2; - return TRUE; - } - } - - if (*p == 0xdf) { - (*pp)++; - return TRUE; - } - } - (*pp)++; if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 && ONIGENC_IS_MBC_ASCII(p)) || @@ -186,9 +160,9 @@ iso_8859_9_is_code_ctype(OnigCodePoint code, unsigned int ctype) static int iso_8859_9_get_all_pair_ambig_codes(OnigAmbigType flag, - OnigPairAmbigCodes** ccs) + const OnigPairAmbigCodes** ccs) { - static OnigPairAmbigCodes cc[] = { + static const OnigPairAmbigCodes cc[] = { { 0xc0, 0xe0 }, { 0xc1, 0xe1 }, { 0xc2, 0xe2 }, @@ -272,8 +246,7 @@ OnigEncodingType OnigEncodingISO_8859_9 = { 1, /* max enc length */ 1, /* min enc length */ (ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE | - ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE | - ONIGENC_AMBIGUOUS_MATCH_COMPOUND), + ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE ), { (OnigCodePoint )'\\' /* esc */ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */ diff --git a/ext/mbstring/oniguruma/enc/koi8.c b/ext/mbstring/oniguruma/enc/koi8.c index f8a5a1da61..d7277e862e 100644 --- a/ext/mbstring/oniguruma/enc/koi8.c +++ b/ext/mbstring/oniguruma/enc/koi8.c @@ -33,7 +33,7 @@ #define ENC_IS_KOI8_CTYPE(code,ctype) \ ((EncKOI8_CtypeTable[code] & ctype) != 0) -static UChar EncKOI8_ToLowerCaseTable[256] = { +static const UChar EncKOI8_ToLowerCaseTable[256] = { '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007', '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017', '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027', @@ -68,23 +68,23 @@ static UChar EncKOI8_ToLowerCaseTable[256] = { '\330', '\331', '\332', '\333', '\334', '\335', '\336', '\337' }; -static unsigned short EncKOI8_CtypeTable[256] = { +static const unsigned short EncKOI8_CtypeTable[256] = { 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x220c, 0x2209, 0x2208, 0x2208, 0x2208, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, - 0x2284, 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x21a0, 0x21a0, 0x21a0, - 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, + 0x2284, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, + 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, - 0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x20a0, 0x20a0, 0x20a0, 0x21a0, + 0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, - 0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x31a0, - 0x20a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2, + 0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x31a0, + 0x21a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, - 0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x20a0, 0x21a0, 0x20a0, 0x2008, + 0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x2008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, 0x0008, @@ -105,9 +105,9 @@ static unsigned short EncKOI8_CtypeTable[256] = { static int koi8_mbc_to_normalize(OnigAmbigType flag, - const UChar** pp, const UChar* end, UChar* lower) + const OnigUChar** pp, const OnigUChar* end, OnigUChar* lower) { - UChar* p = (UChar *)*pp; + const OnigUChar* p = *pp; if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 && ONIGENC_IS_MBC_ASCII(p)) || @@ -123,9 +123,9 @@ koi8_mbc_to_normalize(OnigAmbigType flag, } static int -koi8_is_mbc_ambiguous(OnigAmbigType flag, const UChar** pp, const UChar* end) +koi8_is_mbc_ambiguous(OnigAmbigType flag, const OnigUChar** pp, const OnigUChar* end) { - UChar* p = (UChar *)*pp; + const OnigUChar* p = *pp; (*pp)++; if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 && @@ -151,9 +151,9 @@ koi8_is_code_ctype(OnigCodePoint code, unsigned int ctype) static int koi8_get_all_pair_ambig_codes(OnigAmbigType flag, - OnigPairAmbigCodes** ccs) + const OnigPairAmbigCodes** ccs) { - static OnigPairAmbigCodes cc[] = { + static const OnigPairAmbigCodes cc[] = { { 0xc0, 0xe0 }, { 0xc1, 0xe1 }, { 0xc2, 0xe2 }, diff --git a/ext/mbstring/oniguruma/enc/koi8_r.c b/ext/mbstring/oniguruma/enc/koi8_r.c index 7c626df616..1010f5ff93 100644 --- a/ext/mbstring/oniguruma/enc/koi8_r.c +++ b/ext/mbstring/oniguruma/enc/koi8_r.c @@ -2,7 +2,7 @@ koi8_r.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2005 K.Kosako + * Copyright (c) 2002-2006 K.Kosako * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -33,7 +33,7 @@ #define ENC_IS_KOI8_R_CTYPE(code,ctype) \ ((EncKOI8_R_CtypeTable[code] & ctype) != 0) -static UChar EncKOI8_R_ToLowerCaseTable[256] = { +static const UChar EncKOI8_R_ToLowerCaseTable[256] = { '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007', '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017', '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027', @@ -68,23 +68,23 @@ static UChar EncKOI8_R_ToLowerCaseTable[256] = { '\330', '\331', '\332', '\333', '\334', '\335', '\336', '\337' }; -static unsigned short EncKOI8_R_CtypeTable[256] = { +static const unsigned short EncKOI8_R_CtypeTable[256] = { 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x220c, 0x2209, 0x2208, 0x2208, 0x2208, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, - 0x2284, 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x21a0, 0x21a0, 0x21a0, - 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, + 0x2284, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, + 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, 0x38b0, - 0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x20a0, 0x20a0, 0x20a0, 0x21a0, + 0x38b0, 0x38b0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x3ca2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, 0x34a2, - 0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x20a0, 0x31a0, - 0x20a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2, + 0x34a2, 0x34a2, 0x34a2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x31a0, + 0x21a0, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x38e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, - 0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x20a0, 0x21a0, 0x20a0, 0x2008, + 0x30e2, 0x30e2, 0x30e2, 0x21a0, 0x21a0, 0x21a0, 0x21a0, 0x2008, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, 0x00a0, @@ -150,9 +150,12 @@ koi8_r_is_code_ctype(OnigCodePoint code, unsigned int ctype) static int koi8_r_get_all_pair_ambig_codes(OnigAmbigType flag, - OnigPairAmbigCodes** ccs) + const OnigPairAmbigCodes** ccs) { - static OnigPairAmbigCodes cc[] = { + static const OnigPairAmbigCodes cc[] = { + { 0xa3, 0xb3 }, + { 0xb3, 0xa3 }, + { 0xc0, 0xe0 }, { 0xc1, 0xe1 }, { 0xc2, 0xe2 }, diff --git a/ext/mbstring/oniguruma/enc/mktable.c b/ext/mbstring/oniguruma/enc/mktable.c index 6b9ef4c5b5..fcf057423c 100644 --- a/ext/mbstring/oniguruma/enc/mktable.c +++ b/ext/mbstring/oniguruma/enc/mktable.c @@ -2,7 +2,7 @@ mktable.c **********************************************************************/ /*- - * Copyright (c) 2002-2004 K.Kosako + * Copyright (c) 2002-2006 K.Kosako * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -27,6 +27,7 @@ * SUCH DAMAGE. */ +#include #include #define NOT_RUBY @@ -614,15 +615,10 @@ static int IsPunct(int enc, int c) if (c >= 0x3c && c <= 0x3e) return 1; } - if (c >= 0x21 && c <= 0x23) return 1; - if (c >= 0x25 && c <= 0x2a) return 1; - if (c >= 0x2c && c <= 0x2f) return 1; - if (c >= 0x3a && c <= 0x3b) return 1; - if (c >= 0x3f && c <= 0x40) return 1; - if (c >= 0x5b && c <= 0x5d) return 1; - if (c == 0x5f) return 1; - if (c == 0x7b) return 1; - if (c == 0x7d) return 1; + if (c >= 0x21 && c <= 0x2f) return 1; + if (c >= 0x3a && c <= 0x40) return 1; + if (c >= 0x5b && c <= 0x60) return 1; + if (c >= 0x7b && c <= 0x7e) return 1; switch (enc) { case ISO_8859_1: diff --git a/ext/mbstring/oniguruma/enc/sjis.c b/ext/mbstring/oniguruma/enc/sjis.c index e13407bccf..f7d7d52265 100644 --- a/ext/mbstring/oniguruma/enc/sjis.c +++ b/ext/mbstring/oniguruma/enc/sjis.c @@ -29,7 +29,7 @@ #include "regenc.h" -static int EncLen_SJIS[] = { +static const int EncLen_SJIS[] = { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, @@ -76,7 +76,7 @@ sjis_mbc_enc_len(const UChar* p) return EncLen_SJIS[*p]; } -extern int +static int sjis_code_to_mbclen(OnigCodePoint code) { if (code < 256) { @@ -167,21 +167,16 @@ sjis_is_mbc_ambiguous(OnigAmbigType flag, const UChar** pp, const UChar* end) static int sjis_is_code_ctype(OnigCodePoint code, unsigned int ctype) { - if ((ctype & ONIGENC_CTYPE_WORD) != 0) { - if (code < 128) - return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype); - else { + if (code < 128) + return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype); + else { + if ((ctype & (ONIGENC_CTYPE_WORD | + ONIGENC_CTYPE_GRAPH | ONIGENC_CTYPE_PRINT)) != 0) { return (sjis_code_to_mbclen(code) > 1 ? TRUE : FALSE); } - - ctype &= ~ONIGENC_CTYPE_WORD; - if (ctype == 0) return FALSE; } - if (code < 128) - return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype); - else - return FALSE; + return FALSE; } static UChar* diff --git a/ext/mbstring/oniguruma/enc/unicode.c b/ext/mbstring/oniguruma/enc/unicode.c index e3be9450a5..a8cf539014 100644 --- a/ext/mbstring/oniguruma/enc/unicode.c +++ b/ext/mbstring/oniguruma/enc/unicode.c @@ -30,7 +30,7 @@ #include "regenc.h" -unsigned short OnigEnc_Unicode_ISO_8859_1_CtypeTable[256] = { +const unsigned short OnigEnc_Unicode_ISO_8859_1_CtypeTable[256] = { 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x228c, 0x2289, 0x2288, 0x2288, 0x2288, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, @@ -65,7 +65,7 @@ unsigned short OnigEnc_Unicode_ISO_8859_1_CtypeTable[256] = { 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2, 0x10e2 }; -static OnigCodePoint CRAlnum[] = { +static const OnigCodePoint CRAlnum[] = { #ifdef USE_UNICODE_FULL_RANGE_CTYPE 414, #else @@ -490,7 +490,7 @@ static OnigCodePoint CRAlnum[] = { #endif /* USE_UNICODE_FULL_RANGE_CTYPE */ }; /* end of CRAlnum */ -static OnigCodePoint CRAlpha[] = { +static const OnigCodePoint CRAlpha[] = { #ifdef USE_UNICODE_FULL_RANGE_CTYPE 396, #else @@ -897,7 +897,7 @@ static OnigCodePoint CRAlpha[] = { #endif /* USE_UNICODE_FULL_RANGE_CTYPE */ }; /* end of CRAlpha */ -static OnigCodePoint CRBlank[] = { +static const OnigCodePoint CRBlank[] = { #ifdef USE_UNICODE_FULL_RANGE_CTYPE 9, #else @@ -917,7 +917,7 @@ static OnigCodePoint CRBlank[] = { #endif /* USE_UNICODE_FULL_RANGE_CTYPE */ }; /* end of CRBlank */ -static OnigCodePoint CRCntrl[] = { +static const OnigCodePoint CRCntrl[] = { #ifdef USE_UNICODE_FULL_RANGE_CTYPE 19, #else @@ -947,7 +947,7 @@ static OnigCodePoint CRCntrl[] = { #endif /* USE_UNICODE_FULL_RANGE_CTYPE */ }; /* end of CRCntrl */ -static OnigCodePoint CRDigit[] = { +static const OnigCodePoint CRDigit[] = { #ifdef USE_UNICODE_FULL_RANGE_CTYPE 23, #else @@ -981,7 +981,7 @@ static OnigCodePoint CRDigit[] = { #endif /* USE_UNICODE_FULL_RANGE_CTYPE */ }; /* end of CRDigit */ -static OnigCodePoint CRGraph[] = { +static const OnigCodePoint CRGraph[] = { #ifdef USE_UNICODE_FULL_RANGE_CTYPE 405, #else @@ -1397,7 +1397,7 @@ static OnigCodePoint CRGraph[] = { #endif /* USE_UNICODE_FULL_RANGE_CTYPE */ }; /* end of CRGraph */ -static OnigCodePoint CRLower[] = { +static const OnigCodePoint CRLower[] = { #ifdef USE_UNICODE_FULL_RANGE_CTYPE 424, #else @@ -1832,7 +1832,7 @@ static OnigCodePoint CRLower[] = { #endif /* USE_UNICODE_FULL_RANGE_CTYPE */ }; /* end of CRLower */ -static OnigCodePoint CRPrint[] = { +static const OnigCodePoint CRPrint[] = { #ifdef USE_UNICODE_FULL_RANGE_CTYPE 405, #else @@ -2248,7 +2248,7 @@ static OnigCodePoint CRPrint[] = { #endif /* USE_UNICODE_FULL_RANGE_CTYPE */ }; /* end of CRPrint */ -static OnigCodePoint CRPunct[] = { +static const OnigCodePoint CRPunct[] = { #ifdef USE_UNICODE_FULL_RANGE_CTYPE 86, #else @@ -2345,7 +2345,7 @@ static OnigCodePoint CRPunct[] = { #endif /* USE_UNICODE_FULL_RANGE_CTYPE */ }; /* end of CRPunct */ -static OnigCodePoint CRSpace[] = { +static const OnigCodePoint CRSpace[] = { #ifdef USE_UNICODE_FULL_RANGE_CTYPE 11, #else @@ -2367,7 +2367,7 @@ static OnigCodePoint CRSpace[] = { #endif /* USE_UNICODE_FULL_RANGE_CTYPE */ }; /* end of CRSpace */ -static OnigCodePoint CRUpper[] = { +static const OnigCodePoint CRUpper[] = { #ifdef USE_UNICODE_FULL_RANGE_CTYPE 421, #else @@ -2799,7 +2799,7 @@ static OnigCodePoint CRUpper[] = { #endif /* USE_UNICODE_FULL_RANGE_CTYPE */ }; /* end of CRUpper */ -static OnigCodePoint CRXDigit[] = { +static const OnigCodePoint CRXDigit[] = { #ifdef USE_UNICODE_FULL_RANGE_CTYPE 3, #else @@ -2810,7 +2810,7 @@ static OnigCodePoint CRXDigit[] = { 0x0061, 0x0066 }; -static OnigCodePoint CRASCII[] = { +static const OnigCodePoint CRASCII[] = { #ifdef USE_UNICODE_FULL_RANGE_CTYPE 1, #else @@ -2819,7 +2819,7 @@ static OnigCodePoint CRASCII[] = { 0x0000, 0x007f }; -static OnigCodePoint CRWord[] = { +static const OnigCodePoint CRWord[] = { #ifdef USE_UNICODE_FULL_RANGE_CTYPE 436, #else @@ -3320,6 +3320,9 @@ onigenc_unicode_is_code_ctype(OnigCodePoint code, unsigned int ctype) case ONIGENC_CTYPE_ALNUM: return onig_is_in_code_range((UChar* )CRAlnum, code); break; + case ONIGENC_CTYPE_NEWLINE: + return FALSE; + break; default: return ONIGENCERR_TYPE_BUG; @@ -3337,9 +3340,9 @@ onigenc_unicode_is_code_ctype(OnigCodePoint code, unsigned int ctype) extern int onigenc_unicode_get_ctype_code_range(int ctype, - OnigCodePoint* sbr[], OnigCodePoint* mbr[]) + const OnigCodePoint* sbr[], const OnigCodePoint* mbr[]) { - static OnigCodePoint EmptyRange[] = { 0 }; + static const OnigCodePoint EmptyRange[] = { 0 }; #define CR_SET(list) do { \ *mbr = list; \ diff --git a/ext/mbstring/oniguruma/enc/utf16_be.c b/ext/mbstring/oniguruma/enc/utf16_be.c index ad33ddbeeb..6ab80a6c1c 100755 --- a/ext/mbstring/oniguruma/enc/utf16_be.c +++ b/ext/mbstring/oniguruma/enc/utf16_be.c @@ -2,7 +2,7 @@ utf16_be.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2005 K.Kosako + * Copyright (c) 2002-2006 K.Kosako * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -32,7 +32,7 @@ #define UTF16_IS_SURROGATE_FIRST(c) (c >= 0xd8 && c <= 0xdb) #define UTF16_IS_SURROGATE_SECOND(c) (c >= 0xdc && c <= 0xdf) -static int EncLen_UTF16[] = { +static const int EncLen_UTF16[] = { 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, @@ -63,6 +63,12 @@ utf16be_is_mbc_newline(const UChar* p, const UChar* end) if (p + 1 < end) { if (*(p+1) == 0x0a && *p == 0x00) return 1; +#ifdef USE_UNICODE_ALL_LINE_TERMINATORS + if ((*(p+1) == 0x0d || *(p+1) == 0x85) && *p == 0x00) + return 1; + if (*p == 0x20 && (*(p+1) == 0x29 || *(p+1) == 0x28)) + return 1; +#endif } return 0; } @@ -120,18 +126,6 @@ utf16be_mbc_to_normalize(OnigAmbigType flag, const UChar** pp, const UChar* end, if (*p == 0) { p++; - if (end > p + 2 && - (flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0 && - ((*p == 's' && *(p+2) == 's') || - ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 && - (*p == 'S' && *(p+2) == 'S'))) && - *(p+1) == 0) { - *lower++ = '\0'; - *lower = 0xdf; - (*pp) += 4; - return 2; - } - *lower++ = '\0'; if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 && ONIGENC_IS_MBC_ASCII(p)) || @@ -171,20 +165,6 @@ utf16be_is_mbc_ambiguous(OnigAmbigType flag, const UChar** pp, const UChar* end) int c, v; p++; - if ((flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0) { - if (end > p + 2 && - ((*p == 's' && *(p+2) == 's') || - ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 && - (*p == 'S' && *(p+2) == 'S'))) && - *(p+1) == 0) { - (*pp) += 2; - return TRUE; - } - else if (*p == 0xdf) { - return TRUE; - } - } - if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 && ONIGENC_IS_MBC_ASCII(p)) || ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 && @@ -228,8 +208,7 @@ OnigEncodingType OnigEncodingUTF16_BE = { 4, /* max byte length */ 2, /* min byte length */ (ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE | - ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE | - ONIGENC_AMBIGUOUS_MATCH_COMPOUND), + ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE ), { (OnigCodePoint )'\\' /* esc */ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */ diff --git a/ext/mbstring/oniguruma/enc/utf16_le.c b/ext/mbstring/oniguruma/enc/utf16_le.c index db892dcd14..2248e4910f 100755 --- a/ext/mbstring/oniguruma/enc/utf16_le.c +++ b/ext/mbstring/oniguruma/enc/utf16_le.c @@ -2,7 +2,7 @@ utf16_le.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2005 K.Kosako + * Copyright (c) 2002-2006 K.Kosako * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -32,7 +32,7 @@ #define UTF16_IS_SURROGATE_FIRST(c) (c >= 0xd8 && c <= 0xdb) #define UTF16_IS_SURROGATE_SECOND(c) (c >= 0xdc && c <= 0xdf) -static int EncLen_UTF16[] = { +static const int EncLen_UTF16[] = { 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, @@ -69,6 +69,12 @@ utf16le_is_mbc_newline(const UChar* p, const UChar* end) if (p + 1 < end) { if (*p == 0x0a && *(p+1) == 0x00) return 1; +#ifdef USE_UNICODE_ALL_LINE_TERMINATORS + if ((*p == 0x0d || *p == 0x85) && *(p+1) == 0x00) + return 1; + if (*(p+1) == 0x20 && (*p == 0x29 || *p == 0x28)) + return 1; +#endif } return 0; } @@ -122,18 +128,6 @@ utf16le_mbc_to_normalize(OnigAmbigType flag, const UChar** pp, const UChar* end, const UChar* p = *pp; if (*(p+1) == 0) { - if (end > p + 3 && - (flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0 && - ((*p == 's' && *(p+2) == 's') || - ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 && - (*p == 'S' && *(p+2) == 'S'))) && - *(p+3) == 0) { - *lower++ = 0xdf; - *lower = '\0'; - (*pp) += 4; - return 2; - } - *(lower+1) = '\0'; if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 && ONIGENC_IS_MBC_ASCII(p)) || @@ -170,17 +164,6 @@ utf16le_is_mbc_ambiguous(OnigAmbigType flag, const UChar** pp, const UChar* end) if (*(p+1) == 0) { int c, v; - if ((flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0) { - if (end > p + 3 && - ((*p == 's' && *(p+2) == 's') || - ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 && - (*p == 'S' && *(p+2) == 'S'))) && - *(p+3) == 0) { - (*pp) += 2; - return TRUE; - } - } - if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 && ONIGENC_IS_MBC_ASCII(p)) || ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 && @@ -223,8 +206,7 @@ OnigEncodingType OnigEncodingUTF16_LE = { 4, /* max byte length */ 2, /* min byte length */ (ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE | - ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE | - ONIGENC_AMBIGUOUS_MATCH_COMPOUND), + ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE ), { (OnigCodePoint )'\\' /* esc */ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */ diff --git a/ext/mbstring/oniguruma/enc/utf32_be.c b/ext/mbstring/oniguruma/enc/utf32_be.c index 60feb040b8..75133ca262 100755 --- a/ext/mbstring/oniguruma/enc/utf32_be.c +++ b/ext/mbstring/oniguruma/enc/utf32_be.c @@ -2,7 +2,7 @@ utf32_be.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2005 K.Kosako + * Copyright (c) 2002-2006 K.Kosako * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -41,6 +41,14 @@ utf32be_is_mbc_newline(const UChar* p, const UChar* end) if (p + 3 < end) { if (*(p+3) == 0x0a && *(p+2) == 0 && *(p+1) == 0 && *p == 0) return 1; +#ifdef USE_UNICODE_ALL_LINE_TERMINATORS + if ((*(p+3) == 0x0d || *(p+3) == 0x85) + && *(p+2) == 0 && *(p+1) == 0 && *p == 0x00) + return 1; + if (*(p+2) == 0x20 && (*(p+3) == 0x29 || *(p+3) == 0x28) + && *(p+1) == 0 && *p == 0) + return 1; +#endif } return 0; } @@ -77,20 +85,6 @@ utf32be_mbc_to_normalize(OnigAmbigType flag, const UChar** pp, const UChar* end, if (*(p+2) == 0 && *(p+1) == 0 && *p == 0) { p += 3; - if (end > p + 4 && - (flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0 && - ((*p == 's' && *(p+4) == 's') || - ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 && - (*p == 'S' && *(p+4) == 'S'))) && - *(p+3) == 0 && *(p+2) == 0 && *(p+1) == 0) { - *lower++ = '\0'; - *lower++ = '\0'; - *lower++ = '\0'; - *lower = 0xdf; - (*pp) += 8; - return 4; - } - *lower++ = '\0'; *lower++ = '\0'; *lower++ = '\0'; @@ -131,20 +125,6 @@ utf32be_is_mbc_ambiguous(OnigAmbigType flag, const UChar** pp, const UChar* end) int c, v; p += 3; - if ((flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0) { - if (end > p + 4 && - ((*p == 's' && *(p+4) == 's') || - ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 && - (*p == 'S' && *(p+4) == 'S'))) && - *(p+3) == 0 && *(p+2) == 0 && *(p+1) == 0) { - (*pp) += 4; - return TRUE; - } - else if (*p == 0xdf) { - return TRUE; - } - } - if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 && ONIGENC_IS_MBC_ASCII(p)) || ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 && @@ -183,8 +163,7 @@ OnigEncodingType OnigEncodingUTF32_BE = { 4, /* max byte length */ 4, /* min byte length */ (ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE | - ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE | - ONIGENC_AMBIGUOUS_MATCH_COMPOUND), + ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE ), { (OnigCodePoint )'\\' /* esc */ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */ diff --git a/ext/mbstring/oniguruma/enc/utf32_le.c b/ext/mbstring/oniguruma/enc/utf32_le.c index bba9689f76..21dca10c11 100755 --- a/ext/mbstring/oniguruma/enc/utf32_le.c +++ b/ext/mbstring/oniguruma/enc/utf32_le.c @@ -2,7 +2,7 @@ utf32_le.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2005 K.Kosako + * Copyright (c) 2002-2006 K.Kosako * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -41,6 +41,14 @@ utf32le_is_mbc_newline(const UChar* p, const UChar* end) if (p + 3 < end) { if (*p == 0x0a && *(p+1) == 0 && *(p+2) == 0 && *(p+3) == 0) return 1; +#ifdef USE_UNICODE_ALL_LINE_TERMINATORS + if ((*p == 0x0d || *p == 0x85) && *(p+1) == 0x00 + && (p+2) == 0x00 && *(p+3) == 0x00) + return 1; + if (*(p+1) == 0x20 && (*p == 0x29 || *p == 0x28) + && *(p+2) == 0x00 && *(p+3) == 0x00) + return 1; +#endif } return 0; } @@ -76,20 +84,6 @@ utf32le_mbc_to_normalize(OnigAmbigType flag, const UChar** pp, const UChar* end, const UChar* p = *pp; if (*(p+1) == 0 && *(p+2) == 0 && *(p+3) == 0) { - if (end > p + 7 && - (flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0 && - ((*p == 's' && *(p+4) == 's') || - ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 && - (*p == 'S' && *(p+4) == 'S'))) && - *(p+5) == 0 && *(p+6) == 0 && *(p+7) == 0) { - *lower++ = 0xdf; - *lower++ = '\0'; - *lower++ = '\0'; - *lower = '\0'; - (*pp) += 8; - return 4; - } - if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 && ONIGENC_IS_MBC_ASCII(p)) || ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 && @@ -129,20 +123,6 @@ utf32le_is_mbc_ambiguous(OnigAmbigType flag, const UChar** pp, const UChar* end) if (*(p+1) == 0 && *(p+2) == 0 && *(p+3) == 0) { int c, v; - if ((flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0) { - if (end > p + 7 && - ((*p == 's' && *(p+4) == 's') || - ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 && - (*p == 'S' && *(p+4) == 'S'))) && - *(p+5) == 0 && *(p+6) == 0 && *(p+7) == 0) { - (*pp) += 4; - return TRUE; - } - else if (*p == 0xdf) { - return TRUE; - } - } - if (((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 && ONIGENC_IS_MBC_ASCII(p)) || ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0 && @@ -181,8 +161,7 @@ OnigEncodingType OnigEncodingUTF32_LE = { 4, /* max byte length */ 4, /* min byte length */ (ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE | - ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE | - ONIGENC_AMBIGUOUS_MATCH_COMPOUND), + ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE ), { (OnigCodePoint )'\\' /* esc */ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */ diff --git a/ext/mbstring/oniguruma/enc/utf8.c b/ext/mbstring/oniguruma/enc/utf8.c index 592bebfe8f..c7481d7050 100644 --- a/ext/mbstring/oniguruma/enc/utf8.c +++ b/ext/mbstring/oniguruma/enc/utf8.c @@ -2,7 +2,7 @@ utf8.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2005 K.Kosako + * Copyright (c) 2002-2006 K.Kosako * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -40,7 +40,7 @@ #define utf8_islead(c) ((UChar )((c) & 0xc0) != 0x80) -static int EncLen_UTF8[] = { +static const int EncLen_UTF8[] = { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, @@ -65,6 +65,29 @@ utf8_mbc_enc_len(const UChar* p) return EncLen_UTF8[*p]; } +static int +utf8_is_mbc_newline(const UChar* p, const UChar* end) +{ + if (p < end) { + if (*p == 0x0a) return 1; + +#ifdef USE_UNICODE_ALL_LINE_TERMINATORS + if (*p == 0x0d) return 1; + if (p + 1 < end) { + if (*(p+1) == 0x85 && *p == 0xc2) /* U+0085 */ + return 1; + if (p + 2 < end) { + if ((*(p+2) == 0xa8 || *(p+2) == 0xa9) + && *(p+1) == 0x80 && *p == 0xe2) /* U+2028, U+2029 */ + return 1; + } + } +#endif + } + + return 0; +} + static OnigCodePoint utf8_mbc_to_code(const UChar* p, const UChar* end) { @@ -200,17 +223,6 @@ utf8_mbc_to_normalize(OnigAmbigType flag, const UChar** pp, const UChar* end, UC const UChar* p = *pp; if (ONIGENC_IS_MBC_ASCII(p)) { - if (end > p + 1 && - (flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0 && - ((*p == 's' && *(p+1) == 's') || - ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 && - (*p == 'S' && *(p+1) == 'S')))) { - *lower++ = '\303'; - *lower = '\237'; - (*pp) += 2; - return 2; - } - if ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0) { *lower = ONIGENC_ASCII_CODE_TO_LOWER_CASE(*p); } @@ -235,15 +247,6 @@ utf8_mbc_to_normalize(OnigAmbigType flag, const UChar** pp, const UChar* end, UC return 2; } } -#if 0 - else if (c == (UChar )'\237' && - (flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0) { - *lower++ = '\303'; - *lower = '\237'; - (*pp) += 2; - return 2; - } -#endif } } @@ -265,15 +268,6 @@ utf8_is_mbc_ambiguous(OnigAmbigType flag, const UChar** pp, const UChar* end) const UChar* p = *pp; if (ONIGENC_IS_MBC_ASCII(p)) { - if (end > p + 1 && - (flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0 && - ((*p == 's' && *(p+1) == 's') || - ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0 && - (*p == 'S' && *(p+1) == 'S')))) { - (*pp) += 2; - return TRUE; - } - (*pp)++; if ((flag & ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) != 0) { return ONIGENC_IS_ASCII_CODE_CASE_AMBIG(*p); @@ -295,10 +289,6 @@ utf8_is_mbc_ambiguous(OnigAmbigType flag, const UChar** pp, const UChar* end) return TRUE; } } - else if (c == (UChar )'\237' && - (flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0) { - return TRUE; - } } } } @@ -307,16 +297,16 @@ utf8_is_mbc_ambiguous(OnigAmbigType flag, const UChar** pp, const UChar* end) } -static OnigCodePoint EmptyRange[] = { 0 }; +static const OnigCodePoint EmptyRange[] = { 0 }; -static OnigCodePoint SBAlnum[] = { +static const OnigCodePoint SBAlnum[] = { 3, 0x0030, 0x0039, 0x0041, 0x005a, 0x0061, 0x007a }; -static OnigCodePoint MBAlnum[] = { +static const OnigCodePoint MBAlnum[] = { #ifdef USE_UNICODE_FULL_RANGE_CTYPE 411, #else @@ -738,13 +728,13 @@ static OnigCodePoint MBAlnum[] = { #endif /* USE_UNICODE_FULL_RANGE_CTYPE */ }; /* end of MBAlnum */ -static OnigCodePoint SBAlpha[] = { +static const OnigCodePoint SBAlpha[] = { 2, 0x0041, 0x005a, 0x0061, 0x007a }; -static OnigCodePoint MBAlpha[] = { +static const OnigCodePoint MBAlpha[] = { #ifdef USE_UNICODE_FULL_RANGE_CTYPE 394, #else @@ -1149,13 +1139,13 @@ static OnigCodePoint MBAlpha[] = { #endif /* USE_UNICODE_FULL_RANGE_CTYPE */ }; /* end of MBAlpha */ -static OnigCodePoint SBBlank[] = { +static const OnigCodePoint SBBlank[] = { 2, 0x0009, 0x0009, 0x0020, 0x0020 }; -static OnigCodePoint MBBlank[] = { +static const OnigCodePoint MBBlank[] = { #ifdef USE_UNICODE_FULL_RANGE_CTYPE 7, #else @@ -1173,13 +1163,13 @@ static OnigCodePoint MBBlank[] = { #endif /* USE_UNICODE_FULL_RANGE_CTYPE */ }; /* end of MBBlank */ -static OnigCodePoint SBCntrl[] = { +static const OnigCodePoint SBCntrl[] = { 2, 0x0000, 0x001f, 0x007f, 0x007f }; -static OnigCodePoint MBCntrl[] = { +static const OnigCodePoint MBCntrl[] = { #ifdef USE_UNICODE_FULL_RANGE_CTYPE 18, #else @@ -1208,12 +1198,12 @@ static OnigCodePoint MBCntrl[] = { #endif /* USE_UNICODE_FULL_RANGE_CTYPE */ }; /* end of MBCntrl */ -static OnigCodePoint SBDigit[] = { +static const OnigCodePoint SBDigit[] = { 1, 0x0030, 0x0039 }; -static OnigCodePoint MBDigit[] = { +static const OnigCodePoint MBDigit[] = { #ifdef USE_UNICODE_FULL_RANGE_CTYPE 22, #else @@ -1245,12 +1235,12 @@ static OnigCodePoint MBDigit[] = { #endif /* USE_UNICODE_FULL_RANGE_CTYPE */ }; /* end of MBDigit */ -static OnigCodePoint SBGraph[] = { +static const OnigCodePoint SBGraph[] = { 1, 0x0021, 0x007e }; -static OnigCodePoint MBGraph[] = { +static const OnigCodePoint MBGraph[] = { #ifdef USE_UNICODE_FULL_RANGE_CTYPE 404, #else @@ -1665,12 +1655,12 @@ static OnigCodePoint MBGraph[] = { #endif /* USE_UNICODE_FULL_RANGE_CTYPE */ }; /* end of MBGraph */ -static OnigCodePoint SBLower[] = { +static const OnigCodePoint SBLower[] = { 1, 0x0061, 0x007a }; -static OnigCodePoint MBLower[] = { +static const OnigCodePoint MBLower[] = { #ifdef USE_UNICODE_FULL_RANGE_CTYPE 423, #else @@ -2104,13 +2094,13 @@ static OnigCodePoint MBLower[] = { #endif /* USE_UNICODE_FULL_RANGE_CTYPE */ }; /* end of MBLower */ -static OnigCodePoint SBPrint[] = { +static const OnigCodePoint SBPrint[] = { 2, 0x0009, 0x000d, 0x0020, 0x007e }; -static OnigCodePoint MBPrint[] = { +static const OnigCodePoint MBPrint[] = { #ifdef USE_UNICODE_FULL_RANGE_CTYPE 403, #else @@ -2524,7 +2514,7 @@ static OnigCodePoint MBPrint[] = { #endif /* USE_UNICODE_FULL_RANGE_CTYPE */ }; /* end of MBPrint */ -static OnigCodePoint SBPunct[] = { +static const OnigCodePoint SBPunct[] = { 9, 0x0021, 0x0023, 0x0025, 0x002a, @@ -2537,7 +2527,7 @@ static OnigCodePoint SBPunct[] = { 0x007d, 0x007d }; /* end of SBPunct */ -static OnigCodePoint MBPunct[] = { +static const OnigCodePoint MBPunct[] = { #ifdef USE_UNICODE_FULL_RANGE_CTYPE 77, #else @@ -2625,13 +2615,13 @@ static OnigCodePoint MBPunct[] = { #endif /* USE_UNICODE_FULL_RANGE_CTYPE */ }; /* end of MBPunct */ -static OnigCodePoint SBSpace[] = { +static const OnigCodePoint SBSpace[] = { 2, 0x0009, 0x000d, 0x0020, 0x0020 }; -static OnigCodePoint MBSpace[] = { +static const OnigCodePoint MBSpace[] = { #ifdef USE_UNICODE_FULL_RANGE_CTYPE 9, #else @@ -2651,12 +2641,12 @@ static OnigCodePoint MBSpace[] = { #endif /* USE_UNICODE_FULL_RANGE_CTYPE */ }; /* end of MBSpace */ -static OnigCodePoint SBUpper[] = { +static const OnigCodePoint SBUpper[] = { 1, 0x0041, 0x005a }; -static OnigCodePoint MBUpper[] = { +static const OnigCodePoint MBUpper[] = { #ifdef USE_UNICODE_FULL_RANGE_CTYPE 420, #else @@ -3087,19 +3077,19 @@ static OnigCodePoint MBUpper[] = { #endif /* USE_UNICODE_FULL_RANGE_CTYPE */ }; /* end of MBUpper */ -static OnigCodePoint SBXDigit[] = { +static const OnigCodePoint SBXDigit[] = { 3, 0x0030, 0x0039, 0x0041, 0x0046, 0x0061, 0x0066 }; -static OnigCodePoint SBASCII[] = { +static const OnigCodePoint SBASCII[] = { 1, 0x0000, 0x007f }; -static OnigCodePoint SBWord[] = { +static const OnigCodePoint SBWord[] = { 4, 0x0030, 0x0039, 0x0041, 0x005a, @@ -3107,7 +3097,7 @@ static OnigCodePoint SBWord[] = { 0x0061, 0x007a }; -static OnigCodePoint MBWord[] = { +static const OnigCodePoint MBWord[] = { #ifdef USE_UNICODE_FULL_RANGE_CTYPE 432, #else @@ -3554,7 +3544,7 @@ static OnigCodePoint MBWord[] = { static int utf8_get_ctype_code_range(int ctype, - OnigCodePoint* sbr[], OnigCodePoint* mbr[]) + const OnigCodePoint* sbr[], const OnigCodePoint* mbr[]) { #define CR_SET(sbl,mbl) do { \ *sbr = sbl; \ @@ -3622,7 +3612,7 @@ static int utf8_is_code_ctype(OnigCodePoint code, unsigned int ctype) { #ifdef USE_UNICODE_FULL_RANGE_CTYPE - OnigCodePoint *range; + const OnigCodePoint *range; #endif if (code < 256) { @@ -3674,6 +3664,9 @@ utf8_is_code_ctype(OnigCodePoint code, unsigned int ctype) case ONIGENC_CTYPE_ALNUM: range = MBAlnum; break; + case ONIGENC_CTYPE_NEWLINE: + return FALSE; + break; default: return ONIGENCERR_TYPE_BUG; @@ -3713,8 +3706,7 @@ OnigEncodingType OnigEncodingUTF8 = { 6, /* max byte length */ 1, /* min byte length */ (ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE | - ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE | - ONIGENC_AMBIGUOUS_MATCH_COMPOUND), + ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE ), { (OnigCodePoint )'\\' /* esc */ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */ @@ -3723,7 +3715,7 @@ OnigEncodingType OnigEncodingUTF8 = { , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */ , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */ }, - onigenc_is_mbc_newline_0x0a, + utf8_is_mbc_newline, utf8_mbc_to_code, utf8_code_to_mbclen, utf8_code_to_mbc, diff --git a/ext/mbstring/oniguruma/index.html b/ext/mbstring/oniguruma/index.html index 02e844c363..d55f1cc94f 100755 --- a/ext/mbstring/oniguruma/index.html +++ b/ext/mbstring/oniguruma/index.html @@ -5,58 +5,50 @@ - -M -N - -

Oniguruma

+

Oniguruma

(Japanese)

-2005/03/07 (C) K.Kosako +(c) K.Kosako, updated at: 2007/08/16

+
+ +
What's new + +
    +
  • 2007/08/16: Version 4.7.1 released.
  • +
  • 2007/07/14: Version 5.9.0 released.
  • +
  • 2007/06/20: Version 2.5.9 released.
  • +
  • 2007/06/20: Maintainer of 2.x was changed.
  • +
+
+
+

Oniguruma is a regular expressions library.
The characteristics of this library is that different character encoding
for every regular expression object can be specified. +
(supported APIs: GNU regex, POSIX and Oniguruma native)

Supported character encodings:
ASCII, UTF-8, UTF-16BE, UTF-16LE, UTF-32BE, UTF-32LE,
EUC-JP, EUC-TW, EUC-KR, EUC-CN,
-Shift_JIS, Big5, KOI8-R, KOI8,
+Shift_JIS, Big5, GB18030, KOI8-R, CP1251,
ISO-8859-1, ISO-8859-2, ISO-8859-3, ISO-8859-4, ISO-8859-5,
ISO-8859-6, ISO-8859-7, ISO-8859-8, ISO-8859-9, ISO-8859-10,
-ISO-8859-11, ISO-8859-13, ISO-8859-14, ISO-8859-15, ISO-8859-16 -

-
-

- -

+ISO-8859-11, ISO-8859-13, ISO-8859-14, ISO-8859-15, ISO-8859-16
-
What's new +(GB18030 encoding was contributed by KUBO Takehiro)
+(CP1251 encoding was contributed by Byte)
-
    -
  • released Version 3.7.1 (2005/03/07) -
  • released Version 2.4.2 (2005/03/05) -
+


-
-
There are two ways of using of it in this program. -
    -
  • (1) C library (supported APIs: GNU regex, POSIX and Oniguruma native) -
  • (2) Built-in regular expressions engine of Ruby 1.6/1.8/1.9
    - In Ruby 1.9, Oniguruma is already integrated by Kazuo Saito. -
-
+
License: BSD license.
Platform: @@ -67,31 +59,27 @@ ISO-8859-11, ISO-8859-13, ISO-8859-14, ISO-8859-15, ISO-8859-16
-
License:
-When this software is partly used or it is distributed with Ruby, -this of Ruby follows the license of Ruby.
-It follows the BSD license in the case of the one except for it. -

Download:
-* 3.X.X supports UTF-16/UTF-32, Ruby 1.9.X.
-* 2.X.X does not support UTF-16/UTF-32, supports Ruby 1.6/1.8. +Maintainer of 2.x was changed to Hannes Wyss <hwyss AT ywesee.com>.
+About 2.x, please contact him.
+* 5.x supports Unicode Property/Script.
+* 2.x supports Ruby1.6/1.8.

-
-
Documents: (version 3.7.1) +
Documents: (version 5.9.0)
-
Links: +
Site Links: + +
+
Links: +
References:
-

and I'm thankful to Akinori MUSHA.

- +
+
Other Libraries: + +
+
+Back to Home diff --git a/ext/mbstring/oniguruma/onigcmpt200.h b/ext/mbstring/oniguruma/onigcmpt200.h index 4c029304b6..d9b1419146 100644 --- a/ext/mbstring/oniguruma/onigcmpt200.h +++ b/ext/mbstring/oniguruma/onigcmpt200.h @@ -29,6 +29,12 @@ #define REGCODE_EUCJP REG_ENCODING_EUC_JP #define REGCODE_SJIS REG_ENCODING_SJIS +/* Don't use REGCODE_XXXX. (obsoleted) */ +#define MBCTYPE_ASCII RE_MBCTYPE_ASCII +#define MBCTYPE_EUC RE_MBCTYPE_EUC +#define MBCTYPE_SJIS RE_MBCTYPE_SJIS +#define MBCTYPE_UTF8 RE_MBCTYPE_UTF8 + typedef unsigned char* RegTransTableType; #define RegOptionType OnigOptionType #define RegDistance OnigDistance diff --git a/ext/mbstring/oniguruma/oniggnu.h b/ext/mbstring/oniguruma/oniggnu.h index b203f6c8a3..3da9f235c2 100644 --- a/ext/mbstring/oniguruma/oniggnu.h +++ b/ext/mbstring/oniguruma/oniggnu.h @@ -35,10 +35,10 @@ extern "C" { #endif -#define MBCTYPE_ASCII 0 -#define MBCTYPE_EUC 1 -#define MBCTYPE_SJIS 2 -#define MBCTYPE_UTF8 3 +#define RE_MBCTYPE_ASCII 0 +#define RE_MBCTYPE_EUC 1 +#define RE_MBCTYPE_SJIS 2 +#define RE_MBCTYPE_UTF8 3 /* GNU regex options */ #ifndef RE_NREGS diff --git a/ext/mbstring/oniguruma/oniguruma.h b/ext/mbstring/oniguruma/oniguruma.h index 0fc4ac5a11..5196a3d585 100644 --- a/ext/mbstring/oniguruma/oniguruma.h +++ b/ext/mbstring/oniguruma/oniguruma.h @@ -4,7 +4,7 @@ oniguruma.h - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2005 K.Kosako + * Copyright (c) 2002-2007 K.Kosako * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -36,7 +36,7 @@ extern "C" { #endif #define ONIGURUMA -#define ONIGURUMA_VERSION_MAJOR 3 +#define ONIGURUMA_VERSION_MAJOR 4 #define ONIGURUMA_VERSION_MINOR 7 #define ONIGURUMA_VERSION_TEENY 1 @@ -49,6 +49,13 @@ extern "C" { # endif #endif +/* escape Mac OS X/Xcode 2.4/gcc 4.0.1 problem */ +#if defined(__APPLE__) && defined(__GNUC__) && __GNUC__ >= 4 +# ifndef HAVE_STDARG_PROTOTYPES +# define HAVE_STDARG_PROTOTYPES 1 +# endif +#endif + #ifndef P_ #if defined(__STDC__) || defined(_WIN32) # define P_(args) args @@ -99,17 +106,11 @@ ONIG_EXTERN OnigAmbigType OnigDefaultAmbigFlag; #define ONIGENC_AMBIGUOUS_MATCH_NONE 0 #define ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE (1<<0) #define ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE (1<<1) -/* #define ONIGENC_AMBIGUOUS_MATCH_ACCENT (1<<2) */ -/* #define ONIGENC_AMBIGUOUS_MATCH_HIRAGANA_KATAKANA (1<<3) */ -/* #define ONIGENC_AMBIGUOUS_MATCH_KATAKANA_WIDTH (1<<4) */ #define ONIGENC_AMBIGUOUS_MATCH_LIMIT (1<<1) -#define ONIGENC_AMBIGUOUS_MATCH_COMPOUND (1<<30) #define ONIGENC_AMBIGUOUS_MATCH_FULL \ - ( ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE | \ - ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE | \ - ONIGENC_AMBIGUOUS_MATCH_COMPOUND ) + ( ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE | ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE ) #define ONIGENC_AMBIGUOUS_MATCH_DEFAULT OnigDefaultAmbigFlag @@ -167,10 +168,10 @@ typedef struct { int (*code_to_mbc)(OnigCodePoint code, OnigUChar *buf); int (*mbc_to_normalize)(OnigAmbigType flag, const OnigUChar** pp, const OnigUChar* end, OnigUChar* to); int (*is_mbc_ambiguous)(OnigAmbigType flag, const OnigUChar** pp, const OnigUChar* end); - int (*get_all_pair_ambig_codes)(OnigAmbigType flag, OnigPairAmbigCodes** acs); - int (*get_all_comp_ambig_codes)(OnigAmbigType flag, OnigCompAmbigCodes** acs); + int (*get_all_pair_ambig_codes)(OnigAmbigType flag, const OnigPairAmbigCodes** acs); + int (*get_all_comp_ambig_codes)(OnigAmbigType flag, const OnigCompAmbigCodes** acs); int (*is_code_ctype)(OnigCodePoint code, unsigned int ctype); - int (*get_ctype_code_range)(int ctype, OnigCodePoint* sb_range[], OnigCodePoint* mb_range[]); + int (*get_ctype_code_range)(int ctype, const OnigCodePoint* sb_range[], const OnigCodePoint* mb_range[]); OnigUChar* (*left_adjust_char_head)(const OnigUChar* start, const OnigUChar* p); int (*is_allowed_reverse_match)(const OnigUChar* p, const OnigUChar* end); } OnigEncodingType; @@ -206,6 +207,7 @@ ONIG_EXTERN OnigEncodingType OnigEncodingSJIS; ONIG_EXTERN OnigEncodingType OnigEncodingKOI8; ONIG_EXTERN OnigEncodingType OnigEncodingKOI8_R; ONIG_EXTERN OnigEncodingType OnigEncodingBIG5; +ONIG_EXTERN OnigEncodingType OnigEncodingGB18030; #define ONIG_ENCODING_ASCII (&OnigEncodingASCII) #define ONIG_ENCODING_ISO_8859_1 (&OnigEncodingISO_8859_1) @@ -236,6 +238,7 @@ ONIG_EXTERN OnigEncodingType OnigEncodingBIG5; #define ONIG_ENCODING_KOI8 (&OnigEncodingKOI8) #define ONIG_ENCODING_KOI8_R (&OnigEncodingKOI8_R) #define ONIG_ENCODING_BIG5 (&OnigEncodingBIG5) +#define ONIG_ENCODING_GB18030 (&OnigEncodingGB18030) #endif /* else RUBY && M17N */ @@ -418,11 +421,11 @@ OnigUChar* onigenc_step_back P_((OnigEncoding enc, const OnigUChar* start, const /* encoding API */ ONIG_EXTERN -int onigenc_init P_(()); +int onigenc_init P_((void)); ONIG_EXTERN int onigenc_set_default_encoding P_((OnigEncoding enc)); ONIG_EXTERN -OnigEncoding onigenc_get_default_encoding P_(()); +OnigEncoding onigenc_get_default_encoding P_((void)); ONIG_EXTERN void onigenc_set_default_caseconv_table P_((const OnigUChar* table)); ONIG_EXTERN @@ -448,7 +451,7 @@ int onigenc_str_bytelen_null P_((OnigEncoding enc, const OnigUChar* p)); #define ONIG_NREGION 10 #define ONIG_MAX_BACKREF_NUM 1000 #define ONIG_MAX_REPEAT_NUM 100000 -#define ONIG_MAX_MULTI_BYTE_RANGES_NUM 1000 +#define ONIG_MAX_MULTI_BYTE_RANGES_NUM 10000 /* constants */ #define ONIG_MAX_ERROR_MESSAGE_LEN 90 @@ -457,8 +460,8 @@ typedef unsigned int OnigOptionType; #define ONIG_OPTION_DEFAULT ONIG_OPTION_NONE /* options */ -#define ONIG_OPTION_NONE 0 -#define ONIG_OPTION_IGNORECASE 1L +#define ONIG_OPTION_NONE 0U +#define ONIG_OPTION_IGNORECASE 1U #define ONIG_OPTION_EXTEND (ONIG_OPTION_IGNORECASE << 1) #define ONIG_OPTION_MULTILINE (ONIG_OPTION_EXTEND << 1) #define ONIG_OPTION_SINGLELINE (ONIG_OPTION_MULTILINE << 1) @@ -471,6 +474,7 @@ typedef unsigned int OnigOptionType; #define ONIG_OPTION_NOTBOL (ONIG_OPTION_CAPTURE_GROUP << 1) #define ONIG_OPTION_NOTEOL (ONIG_OPTION_NOTBOL << 1) #define ONIG_OPTION_POSIX_REGION (ONIG_OPTION_NOTEOL << 1) +#define ONIG_OPTION_MAXBIT ONIG_OPTION_POSIX_REGION /* limit */ #define ONIG_OPTION_ON(options,regopt) ((options) |= (regopt)) #define ONIG_OPTION_OFF(options,regopt) ((options) &= ~(regopt)) @@ -484,6 +488,7 @@ typedef struct { OnigOptionType options; /* default option */ } OnigSyntaxType; +ONIG_EXTERN OnigSyntaxType OnigSyntaxASIS; ONIG_EXTERN OnigSyntaxType OnigSyntaxPosixBasic; ONIG_EXTERN OnigSyntaxType OnigSyntaxPosixExtended; ONIG_EXTERN OnigSyntaxType OnigSyntaxEmacs; @@ -491,9 +496,11 @@ ONIG_EXTERN OnigSyntaxType OnigSyntaxGrep; ONIG_EXTERN OnigSyntaxType OnigSyntaxGnuRegex; ONIG_EXTERN OnigSyntaxType OnigSyntaxJava; ONIG_EXTERN OnigSyntaxType OnigSyntaxPerl; +ONIG_EXTERN OnigSyntaxType OnigSyntaxPerl_NG; ONIG_EXTERN OnigSyntaxType OnigSyntaxRuby; /* predefined syntaxes (see regsyntax.c) */ +#define ONIG_SYNTAX_ASIS (&OnigSyntaxASIS) #define ONIG_SYNTAX_POSIX_BASIC (&OnigSyntaxPosixBasic) #define ONIG_SYNTAX_POSIX_EXTENDED (&OnigSyntaxPosixExtended) #define ONIG_SYNTAX_EMACS (&OnigSyntaxEmacs) @@ -501,6 +508,7 @@ ONIG_EXTERN OnigSyntaxType OnigSyntaxRuby; #define ONIG_SYNTAX_GNU_REGEX (&OnigSyntaxGnuRegex) #define ONIG_SYNTAX_JAVA (&OnigSyntaxJava) #define ONIG_SYNTAX_PERL (&OnigSyntaxPerl) +#define ONIG_SYNTAX_PERL_NG (&OnigSyntaxPerl_NG) #define ONIG_SYNTAX_RUBY (&OnigSyntaxRuby) /* default syntax */ @@ -508,80 +516,81 @@ ONIG_EXTERN OnigSyntaxType* OnigDefaultSyntax; #define ONIG_SYNTAX_DEFAULT OnigDefaultSyntax /* syntax (operators) */ -#define ONIG_SYN_OP_VARIABLE_META_CHARACTERS (1<<0) -#define ONIG_SYN_OP_DOT_ANYCHAR (1<<1) /* . */ -#define ONIG_SYN_OP_ASTERISK_ZERO_INF (1<<2) /* * */ -#define ONIG_SYN_OP_ESC_ASTERISK_ZERO_INF (1<<3) -#define ONIG_SYN_OP_PLUS_ONE_INF (1<<4) /* + */ -#define ONIG_SYN_OP_ESC_PLUS_ONE_INF (1<<5) -#define ONIG_SYN_OP_QMARK_ZERO_ONE (1<<6) /* ? */ -#define ONIG_SYN_OP_ESC_QMARK_ZERO_ONE (1<<7) -#define ONIG_SYN_OP_BRACE_INTERVAL (1<<8) /* {lower,upper} */ -#define ONIG_SYN_OP_ESC_BRACE_INTERVAL (1<<9) /* \{lower,upper\} */ -#define ONIG_SYN_OP_VBAR_ALT (1<<10) /* | */ -#define ONIG_SYN_OP_ESC_VBAR_ALT (1<<11) /* \| */ -#define ONIG_SYN_OP_LPAREN_SUBEXP (1<<12) /* (...) */ -#define ONIG_SYN_OP_ESC_LPAREN_SUBEXP (1<<13) /* \(...\) */ -#define ONIG_SYN_OP_ESC_AZ_BUF_ANCHOR (1<<14) /* \A, \Z, \z */ -#define ONIG_SYN_OP_ESC_CAPITAL_G_BEGIN_ANCHOR (1<<15) /* \G */ -#define ONIG_SYN_OP_DECIMAL_BACKREF (1<<16) /* \num */ -#define ONIG_SYN_OP_BRACKET_CC (1<<17) /* [...] */ -#define ONIG_SYN_OP_ESC_W_WORD (1<<18) /* \w, \W */ -#define ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END (1<<19) /* \<. \> */ -#define ONIG_SYN_OP_ESC_B_WORD_BOUND (1<<20) /* \b, \B */ -#define ONIG_SYN_OP_ESC_S_WHITE_SPACE (1<<21) /* \s, \S */ -#define ONIG_SYN_OP_ESC_D_DIGIT (1<<22) /* \d, \D */ -#define ONIG_SYN_OP_LINE_ANCHOR (1<<23) /* ^, $ */ -#define ONIG_SYN_OP_POSIX_BRACKET (1<<24) /* [:xxxx:] */ -#define ONIG_SYN_OP_QMARK_NON_GREEDY (1<<25) /* ??,*?,+?,{n,m}? */ -#define ONIG_SYN_OP_ESC_CONTROL_CHARS (1<<26) /* \n,\r,\t,\a ... */ -#define ONIG_SYN_OP_ESC_C_CONTROL (1<<27) /* \cx */ -#define ONIG_SYN_OP_ESC_OCTAL3 (1<<28) /* \OOO */ -#define ONIG_SYN_OP_ESC_X_HEX2 (1<<29) /* \xHH */ -#define ONIG_SYN_OP_ESC_X_BRACE_HEX8 (1<<30) /* \x{7HHHHHHH} */ - -#define ONIG_SYN_OP2_ESC_CAPITAL_Q_QUOTE (1<<0) /* \Q...\E */ -#define ONIG_SYN_OP2_QMARK_GROUP_EFFECT (1<<1) /* (?...) */ -#define ONIG_SYN_OP2_OPTION_PERL (1<<2) /* (?imsx),(?-imsx) */ -#define ONIG_SYN_OP2_OPTION_RUBY (1<<3) /* (?imx), (?-imx) */ -#define ONIG_SYN_OP2_PLUS_POSSESSIVE_REPEAT (1<<4) /* ?+,*+,++ */ -#define ONIG_SYN_OP2_PLUS_POSSESSIVE_INTERVAL (1<<5) /* {n,m}+ */ -#define ONIG_SYN_OP2_CCLASS_SET_OP (1<<6) /* [...&&..[..]..] */ -#define ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP (1<<7) /* (?...) */ -#define ONIG_SYN_OP2_ESC_K_NAMED_BACKREF (1<<8) /* \k */ -#define ONIG_SYN_OP2_ESC_G_SUBEXP_CALL (1<<9) /* \g, \g */ -#define ONIG_SYN_OP2_ATMARK_CAPTURE_HISTORY (1<<10) /* (?@..),(?@..) */ -#define ONIG_SYN_OP2_ESC_CAPITAL_C_BAR_CONTROL (1<<11) /* \C-x */ -#define ONIG_SYN_OP2_ESC_CAPITAL_M_BAR_META (1<<12) /* \M-x */ -#define ONIG_SYN_OP2_ESC_V_VTAB (1<<13) /* \v as VTAB */ -#define ONIG_SYN_OP2_ESC_U_HEX4 (1<<14) /* \uHHHH */ -#define ONIG_SYN_OP2_ESC_GNU_BUF_ANCHOR (1<<15) /* \`, \' */ -#define ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY (1<<16) /* \p{...}, \P{...} */ -#define ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT (1<<17) /* \p{^..}, \P{^..} */ -#define ONIG_SYN_OP2_CHAR_PROPERTY_PREFIX_IS (1<<18) /* \p{IsXDigit} */ -#define ONIG_SYN_OP2_ESC_H_XDIGIT (1<<19) /* \h, \H */ +#define ONIG_SYN_OP_VARIABLE_META_CHARACTERS (1U<<0) +#define ONIG_SYN_OP_DOT_ANYCHAR (1U<<1) /* . */ +#define ONIG_SYN_OP_ASTERISK_ZERO_INF (1U<<2) /* * */ +#define ONIG_SYN_OP_ESC_ASTERISK_ZERO_INF (1U<<3) +#define ONIG_SYN_OP_PLUS_ONE_INF (1U<<4) /* + */ +#define ONIG_SYN_OP_ESC_PLUS_ONE_INF (1U<<5) +#define ONIG_SYN_OP_QMARK_ZERO_ONE (1U<<6) /* ? */ +#define ONIG_SYN_OP_ESC_QMARK_ZERO_ONE (1U<<7) +#define ONIG_SYN_OP_BRACE_INTERVAL (1U<<8) /* {lower,upper} */ +#define ONIG_SYN_OP_ESC_BRACE_INTERVAL (1U<<9) /* \{lower,upper\} */ +#define ONIG_SYN_OP_VBAR_ALT (1U<<10) /* | */ +#define ONIG_SYN_OP_ESC_VBAR_ALT (1U<<11) /* \| */ +#define ONIG_SYN_OP_LPAREN_SUBEXP (1U<<12) /* (...) */ +#define ONIG_SYN_OP_ESC_LPAREN_SUBEXP (1U<<13) /* \(...\) */ +#define ONIG_SYN_OP_ESC_AZ_BUF_ANCHOR (1U<<14) /* \A, \Z, \z */ +#define ONIG_SYN_OP_ESC_CAPITAL_G_BEGIN_ANCHOR (1U<<15) /* \G */ +#define ONIG_SYN_OP_DECIMAL_BACKREF (1U<<16) /* \num */ +#define ONIG_SYN_OP_BRACKET_CC (1U<<17) /* [...] */ +#define ONIG_SYN_OP_ESC_W_WORD (1U<<18) /* \w, \W */ +#define ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END (1U<<19) /* \<. \> */ +#define ONIG_SYN_OP_ESC_B_WORD_BOUND (1U<<20) /* \b, \B */ +#define ONIG_SYN_OP_ESC_S_WHITE_SPACE (1U<<21) /* \s, \S */ +#define ONIG_SYN_OP_ESC_D_DIGIT (1U<<22) /* \d, \D */ +#define ONIG_SYN_OP_LINE_ANCHOR (1U<<23) /* ^, $ */ +#define ONIG_SYN_OP_POSIX_BRACKET (1U<<24) /* [:xxxx:] */ +#define ONIG_SYN_OP_QMARK_NON_GREEDY (1U<<25) /* ??,*?,+?,{n,m}? */ +#define ONIG_SYN_OP_ESC_CONTROL_CHARS (1U<<26) /* \n,\r,\t,\a ... */ +#define ONIG_SYN_OP_ESC_C_CONTROL (1U<<27) /* \cx */ +#define ONIG_SYN_OP_ESC_OCTAL3 (1U<<28) /* \OOO */ +#define ONIG_SYN_OP_ESC_X_HEX2 (1U<<29) /* \xHH */ +#define ONIG_SYN_OP_ESC_X_BRACE_HEX8 (1U<<30) /* \x{7HHHHHHH} */ + +#define ONIG_SYN_OP2_ESC_CAPITAL_Q_QUOTE (1U<<0) /* \Q...\E */ +#define ONIG_SYN_OP2_QMARK_GROUP_EFFECT (1U<<1) /* (?...) */ +#define ONIG_SYN_OP2_OPTION_PERL (1U<<2) /* (?imsx),(?-imsx) */ +#define ONIG_SYN_OP2_OPTION_RUBY (1U<<3) /* (?imx), (?-imx) */ +#define ONIG_SYN_OP2_PLUS_POSSESSIVE_REPEAT (1U<<4) /* ?+,*+,++ */ +#define ONIG_SYN_OP2_PLUS_POSSESSIVE_INTERVAL (1U<<5) /* {n,m}+ */ +#define ONIG_SYN_OP2_CCLASS_SET_OP (1U<<6) /* [...&&..[..]..] */ +#define ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP (1U<<7) /* (?...) */ +#define ONIG_SYN_OP2_ESC_K_NAMED_BACKREF (1U<<8) /* \k */ +#define ONIG_SYN_OP2_ESC_G_SUBEXP_CALL (1U<<9) /* \g, \g */ +#define ONIG_SYN_OP2_ATMARK_CAPTURE_HISTORY (1U<<10) /* (?@..),(?@..) */ +#define ONIG_SYN_OP2_ESC_CAPITAL_C_BAR_CONTROL (1U<<11) /* \C-x */ +#define ONIG_SYN_OP2_ESC_CAPITAL_M_BAR_META (1U<<12) /* \M-x */ +#define ONIG_SYN_OP2_ESC_V_VTAB (1U<<13) /* \v as VTAB */ +#define ONIG_SYN_OP2_ESC_U_HEX4 (1U<<14) /* \uHHHH */ +#define ONIG_SYN_OP2_ESC_GNU_BUF_ANCHOR (1U<<15) /* \`, \' */ +#define ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY (1U<<16) /* \p{...}, \P{...} */ +#define ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT (1U<<17) /* \p{^..}, \P{^..} */ +#define ONIG_SYN_OP2_CHAR_PROPERTY_PREFIX_IS (1U<<18) /* \p{IsXDigit} */ +#define ONIG_SYN_OP2_ESC_H_XDIGIT (1U<<19) /* \h, \H */ +#define ONIG_SYN_OP2_INEFFECTIVE_ESCAPE (1U<<20) /* \ */ /* syntax (behavior) */ -#define ONIG_SYN_CONTEXT_INDEP_ANCHORS (1<<31) /* not implemented */ -#define ONIG_SYN_CONTEXT_INDEP_REPEAT_OPS (1<<0) /* ?, *, +, {n,m} */ -#define ONIG_SYN_CONTEXT_INVALID_REPEAT_OPS (1<<1) /* error or ignore */ -#define ONIG_SYN_ALLOW_UNMATCHED_CLOSE_SUBEXP (1<<2) /* ...)... */ -#define ONIG_SYN_ALLOW_INVALID_INTERVAL (1<<3) /* {??? */ -#define ONIG_SYN_ALLOW_INTERVAL_LOW_ABBREV (1<<4) /* {,n} => {0,n} */ -#define ONIG_SYN_STRICT_CHECK_BACKREF (1<<5) /* /(\1)/,/\1()/ ..*/ -#define ONIG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND (1<<6) /* (?<=a|bc) */ -#define ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP (1<<7) /* see doc/RE */ -#define ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME (1<<8) /* (?)(?) */ -#define ONIG_SYN_FIXED_INTERVAL_IS_GREEDY_ONLY (1<<9) /* a{n}?=(?:a{n})? */ +#define ONIG_SYN_CONTEXT_INDEP_ANCHORS (1U<<31) /* not implemented */ +#define ONIG_SYN_CONTEXT_INDEP_REPEAT_OPS (1U<<0) /* ?, *, +, {n,m} */ +#define ONIG_SYN_CONTEXT_INVALID_REPEAT_OPS (1U<<1) /* error or ignore */ +#define ONIG_SYN_ALLOW_UNMATCHED_CLOSE_SUBEXP (1U<<2) /* ...)... */ +#define ONIG_SYN_ALLOW_INVALID_INTERVAL (1U<<3) /* {??? */ +#define ONIG_SYN_ALLOW_INTERVAL_LOW_ABBREV (1U<<4) /* {,n} => {0,n} */ +#define ONIG_SYN_STRICT_CHECK_BACKREF (1U<<5) /* /(\1)/,/\1()/ ..*/ +#define ONIG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND (1U<<6) /* (?<=a|bc) */ +#define ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP (1U<<7) /* see doc/RE */ +#define ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME (1U<<8) /* (?)(?) */ +#define ONIG_SYN_FIXED_INTERVAL_IS_GREEDY_ONLY (1U<<9) /* a{n}?=(?:a{n})? */ /* syntax (behavior) in char class [...] */ -#define ONIG_SYN_NOT_NEWLINE_IN_NEGATIVE_CC (1<<20) /* [^...] */ -#define ONIG_SYN_BACKSLASH_ESCAPE_IN_CC (1<<21) /* [..\w..] etc.. */ -#define ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC (1<<22) -#define ONIG_SYN_ALLOW_DOUBLE_RANGE_OP_IN_CC (1<<23) /* [0-9-a]=[0-9\-a] */ +#define ONIG_SYN_NOT_NEWLINE_IN_NEGATIVE_CC (1U<<20) /* [^...] */ +#define ONIG_SYN_BACKSLASH_ESCAPE_IN_CC (1U<<21) /* [..\w..] etc.. */ +#define ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC (1U<<22) +#define ONIG_SYN_ALLOW_DOUBLE_RANGE_OP_IN_CC (1U<<23) /* [0-9-a]=[0-9\-a] */ /* syntax (behavior) warning */ -#define ONIG_SYN_WARN_CC_OP_NOT_ESCAPED (1<<24) /* [,-,] */ -#define ONIG_SYN_WARN_REDUNDANT_NESTED_REPEAT (1<<25) /* (?:a*)+ */ +#define ONIG_SYN_WARN_CC_OP_NOT_ESCAPED (1U<<24) /* [,-,] */ +#define ONIG_SYN_WARN_REDUNDANT_NESTED_REPEAT (1U<<25) /* (?:a*)+ */ /* meta character specifiers (onig_set_meta_char()) */ #define ONIG_META_CHAR_ESCAPE 0 @@ -660,6 +669,7 @@ ONIG_EXTERN OnigSyntaxType* OnigDefaultSyntax; #define ONIGERR_INVALID_WIDE_CHAR_VALUE -400 #define ONIGERR_TOO_BIG_WIDE_CHAR_VALUE -401 #define ONIGERR_NOT_SUPPORTED_ENCODING_COMBINATION -402 +#define ONIGERR_INVALID_COMBINATION_OF_OPTIONS -403 /* errors related to thread */ #define ONIGERR_OVER_THREAD_PASS_LIMIT_COUNT -1001 @@ -701,6 +711,7 @@ struct re_registers { typedef struct re_registers OnigRegion; typedef struct { + OnigEncoding enc; OnigUChar* par; OnigUChar* par_end; } OnigErrorInfo; @@ -735,6 +746,7 @@ typedef struct re_pattern_buffer { int num_mem; /* used memory(...) num counted from 1 */ int num_repeat; /* OP_REPEAT/OP_REPEAT_NG id-counter */ int num_null_check; /* OP_NULL_CHECK_START/END id counter */ + int num_comb_exp_check; /* combination explosion check */ int num_call; /* number of subexp call */ unsigned int capture_history; /* (?@...) flag (1-31) */ unsigned int bt_mem_start; /* need backtrack flag */ @@ -766,7 +778,13 @@ typedef struct re_pattern_buffer { /* regex_t link chain */ struct re_pattern_buffer* chain; /* escape compile-conflict */ -} regex_t; +} OnigRegexType; + +typedef OnigRegexType* OnigRegex; + +#ifndef ONIG_ESCAPE_REGEX_T_COLLISION + typedef OnigRegexType regex_t; +#endif typedef struct { @@ -788,19 +806,19 @@ void onig_set_warn_func P_((OnigWarnFunc f)); ONIG_EXTERN void onig_set_verb_warn_func P_((OnigWarnFunc f)); ONIG_EXTERN -int onig_new P_((regex_t**, const OnigUChar* pattern, const OnigUChar* pattern_end, OnigOptionType option, OnigEncoding enc, OnigSyntaxType* syntax, OnigErrorInfo* einfo)); +int onig_new P_((OnigRegex*, const OnigUChar* pattern, const OnigUChar* pattern_end, OnigOptionType option, OnigEncoding enc, OnigSyntaxType* syntax, OnigErrorInfo* einfo)); ONIG_EXTERN -int onig_new_deluxe P_((regex_t** reg, const OnigUChar* pattern, const OnigUChar* pattern_end, OnigCompileInfo* ci, OnigErrorInfo* einfo)); +int onig_new_deluxe P_((OnigRegex* reg, const OnigUChar* pattern, const OnigUChar* pattern_end, OnigCompileInfo* ci, OnigErrorInfo* einfo)); ONIG_EXTERN -void onig_free P_((regex_t*)); +void onig_free P_((OnigRegex)); ONIG_EXTERN -int onig_recompile P_((regex_t*, const OnigUChar* pattern, const OnigUChar* pattern_end, OnigOptionType option, OnigEncoding enc, OnigSyntaxType* syntax, OnigErrorInfo* einfo)); +int onig_recompile P_((OnigRegex, const OnigUChar* pattern, const OnigUChar* pattern_end, OnigOptionType option, OnigEncoding enc, OnigSyntaxType* syntax, OnigErrorInfo* einfo)); ONIG_EXTERN -int onig_recompile_deluxe P_((regex_t* reg, const OnigUChar* pattern, const OnigUChar* pattern_end, OnigCompileInfo* ci, OnigErrorInfo* einfo)); +int onig_recompile_deluxe P_((OnigRegex reg, const OnigUChar* pattern, const OnigUChar* pattern_end, OnigCompileInfo* ci, OnigErrorInfo* einfo)); ONIG_EXTERN -int onig_search P_((regex_t*, const OnigUChar* str, const OnigUChar* end, const OnigUChar* start, const OnigUChar* range, OnigRegion* region, OnigOptionType option)); +int onig_search P_((OnigRegex, const OnigUChar* str, const OnigUChar* end, const OnigUChar* start, const OnigUChar* range, OnigRegion* region, OnigOptionType option)); ONIG_EXTERN -int onig_match P_((regex_t*, const OnigUChar* str, const OnigUChar* end, const OnigUChar* at, OnigRegion* region, OnigOptionType option)); +int onig_match P_((OnigRegex, const OnigUChar* str, const OnigUChar* end, const OnigUChar* at, OnigRegion* region, OnigOptionType option)); ONIG_EXTERN OnigRegion* onig_region_new P_((void)); ONIG_EXTERN @@ -816,29 +834,31 @@ int onig_region_resize P_((OnigRegion* region, int n)); ONIG_EXTERN int onig_region_set P_((OnigRegion* region, int at, int beg, int end)); ONIG_EXTERN -int onig_name_to_group_numbers P_((regex_t* reg, const OnigUChar* name, const OnigUChar* name_end, int** nums)); +int onig_name_to_group_numbers P_((OnigRegex reg, const OnigUChar* name, const OnigUChar* name_end, int** nums)); ONIG_EXTERN -int onig_name_to_backref_number P_((regex_t* reg, const OnigUChar* name, const OnigUChar* name_end, OnigRegion *region)); +int onig_name_to_backref_number P_((OnigRegex reg, const OnigUChar* name, const OnigUChar* name_end, OnigRegion *region)); ONIG_EXTERN -int onig_foreach_name P_((regex_t* reg, int (*func)(const OnigUChar*, const OnigUChar*,int,int*,regex_t*,void*), void* arg)); +int onig_foreach_name P_((OnigRegex reg, int (*func)(const OnigUChar*, const OnigUChar*,int,int*,OnigRegex,void*), void* arg)); ONIG_EXTERN -int onig_number_of_names P_((regex_t* reg)); +int onig_number_of_names P_((OnigRegex reg)); ONIG_EXTERN -int onig_number_of_captures P_((regex_t* reg)); +int onig_number_of_captures P_((OnigRegex reg)); ONIG_EXTERN -int onig_number_of_capture_histories P_((regex_t* reg)); +int onig_number_of_capture_histories P_((OnigRegex reg)); ONIG_EXTERN OnigCaptureTreeNode* onig_get_capture_tree P_((OnigRegion* region)); ONIG_EXTERN int onig_capture_tree_traverse P_((OnigRegion* region, int at, int(*callback_func)(int,int,int,int,int,void*), void* arg)); ONIG_EXTERN -OnigEncoding onig_get_encoding P_((regex_t* reg)); +int onig_noname_group_capture_is_active P_((OnigRegex reg)); +ONIG_EXTERN +OnigEncoding onig_get_encoding P_((OnigRegex reg)); ONIG_EXTERN -OnigOptionType onig_get_options P_((regex_t* reg)); +OnigOptionType onig_get_options P_((OnigRegex reg)); ONIG_EXTERN -OnigAmbigType onig_get_ambig_flag P_((regex_t* reg)); +OnigAmbigType onig_get_ambig_flag P_((OnigRegex reg)); ONIG_EXTERN -OnigSyntaxType* onig_get_syntax P_((regex_t* reg)); +OnigSyntaxType* onig_get_syntax P_((OnigRegex reg)); ONIG_EXTERN int onig_set_default_syntax P_((OnigSyntaxType* syntax)); ONIG_EXTERN @@ -864,7 +884,7 @@ int onig_set_meta_char P_((OnigEncoding enc, unsigned int what, OnigCodePoint co ONIG_EXTERN void onig_copy_encoding P_((OnigEncoding to, OnigEncoding from)); ONIG_EXTERN -OnigAmbigType onig_get_default_ambig_flag P_(()); +OnigAmbigType onig_get_default_ambig_flag P_((void)); ONIG_EXTERN int onig_set_default_ambig_flag P_((OnigAmbigType ambig_flag)); ONIG_EXTERN diff --git a/ext/mbstring/oniguruma/regcomp.c b/ext/mbstring/oniguruma/regcomp.c index a2315fcec5..6a0976dee2 100644 --- a/ext/mbstring/oniguruma/regcomp.c +++ b/ext/mbstring/oniguruma/regcomp.c @@ -2,7 +2,7 @@ regcomp.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2005 K.Kosako + * Copyright (c) 2002-2007 K.Kosako * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -34,7 +34,7 @@ OnigAmbigType OnigDefaultAmbigFlag = ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE); extern OnigAmbigType -onig_get_default_ambig_flag() +onig_get_default_ambig_flag(void) { return OnigDefaultAmbigFlag; } @@ -47,10 +47,6 @@ onig_set_default_ambig_flag(OnigAmbigType ambig_flag) } -#ifndef PLATFORM_UNALIGNED_WORD_ACCESS -static unsigned char PadBuf[WORD_ALIGNMENT_SIZE]; -#endif - static UChar* k_strdup(UChar* s, UChar* end) { @@ -186,6 +182,17 @@ add_opcode(regex_t* reg, int opcode) return 0; } +#ifdef USE_COMBINATION_EXPLOSION_CHECK +static int +add_state_check_num(regex_t* reg, int num) +{ + StateCheckNumType n = (StateCheckNumType )num; + + BBUF_ADD(reg, &n, SIZE_STATE_CHECK_NUM); + return 0; +} +#endif + static int add_rel_addr(regex_t* reg, int addr) { @@ -528,6 +535,8 @@ add_multi_byte_cclass(BBuf* mbuf, regex_t* reg) add_length(reg, mbuf->used); return add_bytes(reg, mbuf->p, mbuf->used); #else + static unsigned char PadBuf[WORD_ALIGNMENT_SIZE]; + int r, pad_size; UChar* p = BBUF_GET_ADD_ADDRESS(reg) + SIZE_LENGTH; @@ -644,12 +653,12 @@ entry_repeat_range(regex_t* reg, int id, int lower, int upper) } p[id].lower = lower; - p[id].upper = upper; + p[id].upper = (IS_REPEAT_INFINITE(upper) ? 0x7fffffff : upper); return 0; } static int -compile_range_repeat_node(QualifierNode* qn, int target_len, int empty_info, +compile_range_repeat_node(QuantifierNode* qn, int target_len, int empty_info, regex_t* reg) { int r; @@ -673,7 +682,7 @@ compile_range_repeat_node(QualifierNode* qn, int target_len, int empty_info, #ifdef USE_SUBEXP_CALL reg->num_call > 0 || #endif - IS_QUALIFIER_IN_REPEAT(qn)) { + IS_QUANTIFIER_IN_REPEAT(qn)) { r = add_opcode(reg, qn->greedy ? OP_REPEAT_INC_SG : OP_REPEAT_INC_NG_SG); } else { @@ -684,10 +693,257 @@ compile_range_repeat_node(QualifierNode* qn, int target_len, int empty_info, return r; } -#define QUALIFIER_EXPAND_LIMIT_SIZE 50 +static int +is_anychar_star_quantifier(QuantifierNode* qn) +{ + if (qn->greedy && IS_REPEAT_INFINITE(qn->upper) && + NTYPE(qn->target) == N_ANYCHAR) + return 1; + else + return 0; +} + +#define QUANTIFIER_EXPAND_LIMIT_SIZE 50 +#define CKN_ON (ckn > 0) + +#ifdef USE_COMBINATION_EXPLOSION_CHECK static int -compile_length_qualifier_node(QualifierNode* qn, regex_t* reg) +compile_length_quantifier_node(QuantifierNode* qn, regex_t* reg) +{ + int len, mod_tlen, cklen; + int ckn; + int infinite = IS_REPEAT_INFINITE(qn->upper); + int empty_info = qn->target_empty_info; + int tlen = compile_length_tree(qn->target, reg); + + if (tlen < 0) return tlen; + + ckn = ((reg->num_comb_exp_check > 0) ? qn->comb_exp_check_num : 0); + + cklen = (CKN_ON ? SIZE_STATE_CHECK_NUM: 0); + + /* anychar repeat */ + if (NTYPE(qn->target) == N_ANYCHAR) { + if (qn->greedy && infinite) { + if (IS_NOT_NULL(qn->next_head_exact) && !CKN_ON) + return SIZE_OP_ANYCHAR_STAR_PEEK_NEXT + tlen * qn->lower + cklen; + else + return SIZE_OP_ANYCHAR_STAR + tlen * qn->lower + cklen; + } + } + + if (empty_info != 0) + mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END); + else + mod_tlen = tlen; + + if (infinite && qn->lower <= 1) { + if (qn->greedy) { + if (qn->lower == 1) + len = SIZE_OP_JUMP; + else + len = 0; + + len += SIZE_OP_PUSH + cklen + mod_tlen + SIZE_OP_JUMP; + } + else { + if (qn->lower == 0) + len = SIZE_OP_JUMP; + else + len = 0; + + len += mod_tlen + SIZE_OP_PUSH + cklen; + } + } + else if (qn->upper == 0) { + if (qn->is_refered != 0) /* /(?..){0}/ */ + len = SIZE_OP_JUMP + tlen; + else + len = 0; + } + else if (qn->upper == 1 && qn->greedy) { + if (qn->lower == 0) { + if (CKN_ON) { + len = SIZE_OP_STATE_CHECK_PUSH + tlen; + } + else { + len = SIZE_OP_PUSH + tlen; + } + } + else { + len = tlen; + } + } + else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */ + len = SIZE_OP_PUSH + cklen + SIZE_OP_JUMP + tlen; + } + else { + len = SIZE_OP_REPEAT_INC + + mod_tlen + SIZE_OPCODE + SIZE_RELADDR + SIZE_MEMNUM; + if (CKN_ON) + len += SIZE_OP_STATE_CHECK; + } + + return len; +} + +static int +compile_quantifier_node(QuantifierNode* qn, regex_t* reg) +{ + int r, mod_tlen; + int ckn; + int infinite = IS_REPEAT_INFINITE(qn->upper); + int empty_info = qn->target_empty_info; + int tlen = compile_length_tree(qn->target, reg); + + if (tlen < 0) return tlen; + + ckn = ((reg->num_comb_exp_check > 0) ? qn->comb_exp_check_num : 0); + + if (is_anychar_star_quantifier(qn)) { + r = compile_tree_n_times(qn->target, qn->lower, reg); + if (r) return r; + if (IS_NOT_NULL(qn->next_head_exact) && !CKN_ON) { + if (IS_MULTILINE(reg->options)) + r = add_opcode(reg, OP_ANYCHAR_ML_STAR_PEEK_NEXT); + else + r = add_opcode(reg, OP_ANYCHAR_STAR_PEEK_NEXT); + if (r) return r; + if (CKN_ON) { + r = add_state_check_num(reg, ckn); + if (r) return r; + } + + return add_bytes(reg, NSTRING(qn->next_head_exact).s, 1); + } + else { + if (IS_MULTILINE(reg->options)) { + r = add_opcode(reg, (CKN_ON ? + OP_STATE_CHECK_ANYCHAR_ML_STAR + : OP_ANYCHAR_ML_STAR)); + } + else { + r = add_opcode(reg, (CKN_ON ? + OP_STATE_CHECK_ANYCHAR_STAR + : OP_ANYCHAR_STAR)); + } + if (r) return r; + if (CKN_ON) + r = add_state_check_num(reg, ckn); + + return r; + } + } + + if (empty_info != 0) + mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END); + else + mod_tlen = tlen; + + if (infinite && qn->lower <= 1) { + if (qn->greedy) { + if (qn->lower == 1) { + r = add_opcode_rel_addr(reg, OP_JUMP, + (CKN_ON ? SIZE_OP_STATE_CHECK_PUSH : SIZE_OP_PUSH)); + if (r) return r; + } + + if (CKN_ON) { + r = add_opcode(reg, OP_STATE_CHECK_PUSH); + if (r) return r; + r = add_state_check_num(reg, ckn); + if (r) return r; + r = add_rel_addr(reg, mod_tlen + SIZE_OP_JUMP); + } + else { + r = add_opcode_rel_addr(reg, OP_PUSH, mod_tlen + SIZE_OP_JUMP); + } + if (r) return r; + r = compile_tree_empty_check(qn->target, reg, empty_info); + if (r) return r; + r = add_opcode_rel_addr(reg, OP_JUMP, + -(mod_tlen + (int )SIZE_OP_JUMP + + (int )(CKN_ON ? SIZE_OP_STATE_CHECK_PUSH : SIZE_OP_PUSH))); + } + else { + if (qn->lower == 0) { + r = add_opcode_rel_addr(reg, OP_JUMP, mod_tlen); + if (r) return r; + } + r = compile_tree_empty_check(qn->target, reg, empty_info); + if (r) return r; + if (CKN_ON) { + r = add_opcode(reg, OP_STATE_CHECK_PUSH_OR_JUMP); + if (r) return r; + r = add_state_check_num(reg, ckn); + if (r) return r; + r = add_rel_addr(reg, + -(mod_tlen + (int )SIZE_OP_STATE_CHECK_PUSH_OR_JUMP)); + } + else + r = add_opcode_rel_addr(reg, OP_PUSH, -(mod_tlen + (int )SIZE_OP_PUSH)); + } + } + else if (qn->upper == 0) { + if (qn->is_refered != 0) { /* /(?..){0}/ */ + r = add_opcode_rel_addr(reg, OP_JUMP, tlen); + if (r) return r; + r = compile_tree(qn->target, reg); + } + else + r = 0; + } + else if (qn->upper == 1 && qn->greedy) { + if (qn->lower == 0) { + if (CKN_ON) { + r = add_opcode(reg, OP_STATE_CHECK_PUSH); + if (r) return r; + r = add_state_check_num(reg, ckn); + if (r) return r; + r = add_rel_addr(reg, tlen); + } + else { + r = add_opcode_rel_addr(reg, OP_PUSH, tlen); + } + if (r) return r; + } + + r = compile_tree(qn->target, reg); + } + else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */ + if (CKN_ON) { + r = add_opcode(reg, OP_STATE_CHECK_PUSH); + if (r) return r; + r = add_state_check_num(reg, ckn); + if (r) return r; + r = add_rel_addr(reg, SIZE_OP_JUMP); + } + else { + r = add_opcode_rel_addr(reg, OP_PUSH, SIZE_OP_JUMP); + } + + if (r) return r; + r = add_opcode_rel_addr(reg, OP_JUMP, tlen); + if (r) return r; + r = compile_tree(qn->target, reg); + } + else { + r = compile_range_repeat_node(qn, mod_tlen, empty_info, reg); + if (CKN_ON) { + if (r) return r; + r = add_opcode(reg, OP_STATE_CHECK); + if (r) return r; + r = add_state_check_num(reg, ckn); + } + } + return r; +} + +#else /* USE_COMBINATION_EXPLOSION_CHECK */ + +static int +compile_length_quantifier_node(QuantifierNode* qn, regex_t* reg) { int len, mod_tlen; int infinite = IS_REPEAT_INFINITE(qn->upper); @@ -712,8 +968,8 @@ compile_length_qualifier_node(QualifierNode* qn, regex_t* reg) mod_tlen = tlen; if (infinite && - (qn->lower <= 1 || tlen * qn->lower <= QUALIFIER_EXPAND_LIMIT_SIZE)) { - if (qn->lower == 1 && tlen > QUALIFIER_EXPAND_LIMIT_SIZE) { + (qn->lower <= 1 || tlen * qn->lower <= QUANTIFIER_EXPAND_LIMIT_SIZE)) { + if (qn->lower == 1 && tlen > QUANTIFIER_EXPAND_LIMIT_SIZE) { len = SIZE_OP_JUMP; } else { @@ -736,7 +992,7 @@ compile_length_qualifier_node(QualifierNode* qn, regex_t* reg) } else if (!infinite && qn->greedy && (qn->upper == 1 || (tlen + SIZE_OP_PUSH) * qn->upper - <= QUALIFIER_EXPAND_LIMIT_SIZE)) { + <= QUANTIFIER_EXPAND_LIMIT_SIZE)) { len = tlen * qn->lower; len += (SIZE_OP_PUSH + tlen) * (qn->upper - qn->lower); } @@ -752,17 +1008,7 @@ compile_length_qualifier_node(QualifierNode* qn, regex_t* reg) } static int -is_anychar_star_qualifier(QualifierNode* qn) -{ - if (qn->greedy && IS_REPEAT_INFINITE(qn->upper) && - NTYPE(qn->target) == N_ANYCHAR) - return 1; - else - return 0; -} - -static int -compile_qualifier_node(QualifierNode* qn, regex_t* reg) +compile_quantifier_node(QuantifierNode* qn, regex_t* reg) { int i, r, mod_tlen; int infinite = IS_REPEAT_INFINITE(qn->upper); @@ -771,7 +1017,7 @@ compile_qualifier_node(QualifierNode* qn, regex_t* reg) if (tlen < 0) return tlen; - if (is_anychar_star_qualifier(qn)) { + if (is_anychar_star_quantifier(qn)) { r = compile_tree_n_times(qn->target, qn->lower, reg); if (r) return r; if (IS_NOT_NULL(qn->next_head_exact)) { @@ -796,8 +1042,8 @@ compile_qualifier_node(QualifierNode* qn, regex_t* reg) mod_tlen = tlen; if (infinite && - (qn->lower <= 1 || tlen * qn->lower <= QUALIFIER_EXPAND_LIMIT_SIZE)) { - if (qn->lower == 1 && tlen > QUALIFIER_EXPAND_LIMIT_SIZE) { + (qn->lower <= 1 || tlen * qn->lower <= QUANTIFIER_EXPAND_LIMIT_SIZE)) { + if (qn->lower == 1 && tlen > QUANTIFIER_EXPAND_LIMIT_SIZE) { if (qn->greedy) { if (IS_NOT_NULL(qn->head_exact)) r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_PUSH_OR_JUMP_EXACT1); @@ -861,7 +1107,7 @@ compile_qualifier_node(QualifierNode* qn, regex_t* reg) } else if (!infinite && qn->greedy && (qn->upper == 1 || (tlen + SIZE_OP_PUSH) * qn->upper - <= QUALIFIER_EXPAND_LIMIT_SIZE)) { + <= QUANTIFIER_EXPAND_LIMIT_SIZE)) { int n = qn->upper - qn->lower; r = compile_tree_n_times(qn->target, qn->lower, reg); @@ -887,6 +1133,7 @@ compile_qualifier_node(QualifierNode* qn, regex_t* reg) } return r; } +#endif /* USE_COMBINATION_EXPLOSION_CHECK */ static int compile_length_option_node(EffectNode* node, regex_t* reg) @@ -978,7 +1225,7 @@ compile_length_effect_node(EffectNode* node, regex_t* reg) case EFFECT_STOP_BACKTRACK: if (IS_EFFECT_STOP_BT_SIMPLE_REPEAT(node)) { - QualifierNode* qn = &NQUALIFIER(node->target); + QuantifierNode* qn = &NQUANTIFIER(node->target); tlen = compile_length_tree(qn->target, reg); if (tlen < 0) return tlen; @@ -1068,7 +1315,7 @@ compile_effect_node(EffectNode* node, regex_t* reg) case EFFECT_STOP_BACKTRACK: if (IS_EFFECT_STOP_BT_SIMPLE_REPEAT(node)) { - QualifierNode* qn = &NQUALIFIER(node->target); + QuantifierNode* qn = &NQUANTIFIER(node->target); r = compile_tree_n_times(qn->target, qn->lower, reg); if (r) return r; @@ -1268,8 +1515,15 @@ compile_length_tree(Node* node, regex_t* reg) { BackrefNode* br = &(NBACKREF(node)); +#ifdef USE_BACKREF_AT_LEVEL + if (IS_BACKREF_NEST_LEVEL(br)) { + r = SIZE_OPCODE + SIZE_OPTION + SIZE_LENGTH + + SIZE_LENGTH + (SIZE_MEMNUM * br->back_num); + } + else +#endif if (br->back_num == 1) { - r = ((!IS_IGNORECASE(reg->options) && br->back_static[0] <= 3) + r = ((!IS_IGNORECASE(reg->options) && br->back_static[0] <= 2) ? SIZE_OPCODE : (SIZE_OPCODE + SIZE_MEMNUM)); } else { @@ -1284,8 +1538,8 @@ compile_length_tree(Node* node, regex_t* reg) break; #endif - case N_QUALIFIER: - r = compile_length_qualifier_node(&(NQUALIFIER(node)), reg); + case N_QUANTIFIER: + r = compile_length_quantifier_node(&(NQUANTIFIER(node)), reg); break; case N_EFFECT: @@ -1381,9 +1635,21 @@ compile_tree(Node* node, regex_t* reg) case N_BACKREF: { - int i; BackrefNode* br = &(NBACKREF(node)); +#ifdef USE_BACKREF_AT_LEVEL + if (IS_BACKREF_NEST_LEVEL(br)) { + r = add_opcode(reg, OP_BACKREF_AT_LEVEL); + if (r) return r; + r = add_option(reg, (reg->options & ONIG_OPTION_IGNORECASE)); + if (r) return r; + r = add_length(reg, br->nest_level); + if (r) return r; + + goto add_bacref_mems; + } + else +#endif if (br->back_num == 1) { n = br->back_static[0]; if (IS_IGNORECASE(reg->options)) { @@ -1395,7 +1661,6 @@ compile_tree(Node* node, regex_t* reg) switch (n) { case 1: r = add_opcode(reg, OP_BACKREF1); break; case 2: r = add_opcode(reg, OP_BACKREF2); break; - case 3: r = add_opcode(reg, OP_BACKREF3); break; default: r = add_opcode(reg, OP_BACKREFN); if (r) return r; @@ -1405,17 +1670,21 @@ compile_tree(Node* node, regex_t* reg) } } else { + int i; int* p; if (IS_IGNORECASE(reg->options)) { - add_opcode(reg, OP_BACKREF_MULTI_IC); + r = add_opcode(reg, OP_BACKREF_MULTI_IC); } else { - add_opcode(reg, OP_BACKREF_MULTI); + r = add_opcode(reg, OP_BACKREF_MULTI); } - if (r) return r; - add_length(reg, br->back_num); + +#ifdef USE_BACKREF_AT_LEVEL + add_bacref_mems: +#endif + r = add_length(reg, br->back_num); if (r) return r; p = BACKREFS_P(br); for (i = br->back_num - 1; i >= 0; i--) { @@ -1432,8 +1701,8 @@ compile_tree(Node* node, regex_t* reg) break; #endif - case N_QUALIFIER: - r = compile_qualifier_node(&(NQUALIFIER(node)), reg); + case N_QUANTIFIER: + r = compile_quantifier_node(&(NQUANTIFIER(node)), reg); break; case N_EFFECT: @@ -1470,13 +1739,13 @@ noname_disable_map(Node** plink, GroupNumRemap* map, int* counter) } while (r == 0 && IS_NOT_NULL(node = NCONS(node).right)); break; - case N_QUALIFIER: + case N_QUANTIFIER: { - Node** ptarget = &(NQUALIFIER(node).target); + Node** ptarget = &(NQUANTIFIER(node).target); Node* old = *ptarget; r = noname_disable_map(ptarget, map, counter); - if (*ptarget != old && NTYPE(*ptarget) == N_QUALIFIER) { - onig_reduce_nested_qualifier(node, *ptarget); + if (*ptarget != old && NTYPE(*ptarget) == N_QUANTIFIER) { + onig_reduce_nested_quantifier(node, *ptarget); } } break; @@ -1550,8 +1819,8 @@ renumber_by_map(Node* node, GroupNumRemap* map) r = renumber_by_map(NCONS(node).left, map); } while (r == 0 && IS_NOT_NULL(node = NCONS(node).right)); break; - case N_QUALIFIER: - r = renumber_by_map(NQUALIFIER(node).target, map); + case N_QUANTIFIER: + r = renumber_by_map(NQUANTIFIER(node).target, map); break; case N_EFFECT: r = renumber_by_map(NEFFECT(node).target, map); @@ -1580,8 +1849,8 @@ numbered_ref_check(Node* node) r = numbered_ref_check(NCONS(node).left); } while (r == 0 && IS_NOT_NULL(node = NCONS(node).right)); break; - case N_QUALIFIER: - r = numbered_ref_check(NQUALIFIER(node).target); + case N_QUANTIFIER: + r = numbered_ref_check(NQUANTIFIER(node).target); break; case N_EFFECT: r = numbered_ref_check(NEFFECT(node).target); @@ -1662,7 +1931,7 @@ unset_addr_list_fix(UnsetAddrList* uslist, regex_t* reg) #ifdef USE_INFINITE_REPEAT_MONOMANIAC_MEM_STATUS_CHECK static int -qualifiers_memory_node_info(Node* node) +quantifiers_memory_node_info(Node* node) { int r = 0; @@ -1672,7 +1941,7 @@ qualifiers_memory_node_info(Node* node) { int v; do { - v = qualifiers_memory_node_info(NCONS(node).left); + v = quantifiers_memory_node_info(NCONS(node).left); if (v > r) r = v; } while (v >= 0 && IS_NOT_NULL(node = NCONS(node).right)); } @@ -1684,15 +1953,15 @@ qualifiers_memory_node_info(Node* node) return NQ_TARGET_IS_EMPTY_REC; /* tiny version */ } else - r = qualifiers_memory_node_info(NCALL(node).target); + r = quantifiers_memory_node_info(NCALL(node).target); break; #endif - case N_QUALIFIER: + case N_QUANTIFIER: { - QualifierNode* qn = &(NQUALIFIER(node)); + QuantifierNode* qn = &(NQUANTIFIER(node)); if (qn->upper != 0) { - r = qualifiers_memory_node_info(qn->target); + r = quantifiers_memory_node_info(qn->target); } } break; @@ -1707,7 +1976,7 @@ qualifiers_memory_node_info(Node* node) case EFFECT_OPTION: case EFFECT_STOP_BACKTRACK: - r = qualifiers_memory_node_info(en->target); + r = quantifiers_memory_node_info(en->target); break; default: break; @@ -1812,9 +2081,9 @@ get_min_match_length(Node* node, OnigDistance *min, ScanEnv* env) *min = 1; break; - case N_QUALIFIER: + case N_QUANTIFIER: { - QualifierNode* qn = &(NQUALIFIER(node)); + QuantifierNode* qn = &(NQUANTIFIER(node)); if (qn->lower > 0) { r = get_min_match_length(qn->target, min, env); @@ -1933,9 +2202,9 @@ get_max_match_length(Node* node, OnigDistance *max, ScanEnv* env) break; #endif - case N_QUALIFIER: + case N_QUANTIFIER: { - QualifierNode* qn = &(NQUALIFIER(node)); + QuantifierNode* qn = &(NQUANTIFIER(node)); if (qn->upper != 0) { r = get_max_match_length(qn->target, max, env); @@ -2040,9 +2309,9 @@ get_char_length_tree1(Node* node, regex_t* reg, int* len, int level) } break; - case N_QUALIFIER: + case N_QUANTIFIER: { - QualifierNode* qn = &(NQUALIFIER(node)); + QuantifierNode* qn = &(NQUANTIFIER(node)); if (qn->lower == qn->upper) { r = get_char_length_tree1(qn->target, reg, &tlen, level); if (r == 0) @@ -2120,29 +2389,6 @@ get_char_length_tree(Node* node, regex_t* reg, int* len) return get_char_length_tree1(node, reg, len, 0); } -extern int -onig_is_code_in_cc(OnigEncoding enc, OnigCodePoint code, CClassNode* cc) -{ - int found; - - if (ONIGENC_MBC_MINLEN(enc) > 1 || (code >= SINGLE_BYTE_SIZE)) { - if (IS_NULL(cc->mbuf)) { - found = 0; - } - else { - found = (onig_is_in_code_range(cc->mbuf->p, code) != 0 ? 1 : 0); - } - } - else { - found = (BITSET_AT(cc->bs, code) == 0 ? 0 : 1); - } - - if (IS_CCLASS_NOT(cc)) - return !found; - else - return found; -} - /* x is not included y ==> 1 : 0 */ static int is_not_included(Node* x, Node* y, regex_t* reg) @@ -2375,9 +2621,9 @@ get_head_value_node(Node* node, int exact, regex_t* reg) } break; - case N_QUALIFIER: + case N_QUANTIFIER: { - QualifierNode* qn = &(NQUALIFIER(node)); + QuantifierNode* qn = &(NQUANTIFIER(node)); if (qn->lower > 0) { if (IS_NOT_NULL(qn->head_exact)) n = qn->head_exact; @@ -2438,8 +2684,8 @@ check_type_tree(Node* node, int type_mask, int effect_mask, int anchor_mask) } while (r == 0 && IS_NOT_NULL(node = NCONS(node).right)); break; - case N_QUALIFIER: - r = check_type_tree(NQUALIFIER(node).target, type_mask, effect_mask, + case N_QUANTIFIER: + r = check_type_tree(NQUANTIFIER(node).target, type_mask, effect_mask, anchor_mask); break; @@ -2514,8 +2760,11 @@ subexp_inf_recursive_check(Node* node, ScanEnv* env, int head) } break; - case N_QUALIFIER: - r = subexp_inf_recursive_check(NQUALIFIER(node).target, env, head); + case N_QUANTIFIER: + r = subexp_inf_recursive_check(NQUANTIFIER(node).target, env, head); + if (r == RECURSION_EXIST) { + if (NQUANTIFIER(node).lower == 0) r = 0; + } break; case N_ANCHOR: @@ -2570,8 +2819,8 @@ subexp_inf_recursive_check_trav(Node* node, ScanEnv* env) } while (r == 0 && IS_NOT_NULL(node = NCONS(node).right)); break; - case N_QUALIFIER: - r = subexp_inf_recursive_check_trav(NQUALIFIER(node).target, env); + case N_QUANTIFIER: + r = subexp_inf_recursive_check_trav(NQUANTIFIER(node).target, env); break; case N_ANCHOR: @@ -2625,8 +2874,8 @@ subexp_recursive_check(Node* node) } while (IS_NOT_NULL(node = NCONS(node).right)); break; - case N_QUALIFIER: - r = subexp_recursive_check(NQUALIFIER(node).target); + case N_QUANTIFIER: + r = subexp_recursive_check(NQUANTIFIER(node).target); break; case N_ANCHOR: @@ -2690,11 +2939,11 @@ subexp_recursive_check_trav(Node* node, ScanEnv* env) } break; - case N_QUALIFIER: - r = subexp_recursive_check_trav(NQUALIFIER(node).target, env); - if (NQUALIFIER(node).upper == 0) { + case N_QUANTIFIER: + r = subexp_recursive_check_trav(NQUANTIFIER(node).target, env); + if (NQUANTIFIER(node).upper == 0) { if (r == FOUND_CALLED_NODE) - NQUALIFIER(node).is_refered = 1; + NQUANTIFIER(node).is_refered = 1; } break; @@ -2757,8 +3006,8 @@ setup_subexp_call(Node* node, ScanEnv* env) } while (r == 0 && IS_NOT_NULL(node = NCONS(node).right)); break; - case N_QUALIFIER: - r = setup_subexp_call(NQUALIFIER(node).target, env); + case N_QUANTIFIER: + r = setup_subexp_call(NQUANTIFIER(node).target, env); break; case N_EFFECT: r = setup_subexp_call(NEFFECT(node).target, env); @@ -2907,10 +3156,10 @@ next_setup(Node* node, Node* next_node, regex_t* reg) retry: type = NTYPE(node); - if (type == N_QUALIFIER) { - QualifierNode* qn = &(NQUALIFIER(node)); + if (type == N_QUANTIFIER) { + QuantifierNode* qn = &(NQUANTIFIER(node)); if (qn->greedy && IS_REPEAT_INFINITE(qn->upper)) { -#ifdef USE_QUALIFIER_PEEK_NEXT +#ifdef USE_QUANTIFIER_PEEK_NEXT qn->next_head_exact = get_head_value_node(next_node, 1, reg); #endif /* automatic posseivation a*b ==> (?>a*)b */ @@ -2943,15 +3192,55 @@ next_setup(Node* node, Node* next_node, regex_t* reg) return 0; } + +static int +divide_ambig_string_node_sub(regex_t* reg, int prev_ambig, + UChar* prev_start, UChar* prev, + UChar* end, Node*** tailp, Node** root) +{ + UChar *tmp, *wp; + Node* snode; + + if (prev_ambig != 0) { + tmp = prev_start; + wp = prev_start; + while (tmp < prev) { + wp += ONIGENC_MBC_TO_NORMALIZE(reg->enc, reg->ambig_flag, + &tmp, end, wp); + } + snode = onig_node_new_str(prev_start, wp); + CHECK_NULL_RETURN_VAL(snode, ONIGERR_MEMORY); + NSTRING_SET_AMBIG(snode); + if (wp != prev) NSTRING_SET_AMBIG_REDUCE(snode); + } + else { + snode = onig_node_new_str(prev_start, prev); + CHECK_NULL_RETURN_VAL(snode, ONIGERR_MEMORY); + } + + if (*tailp == (Node** )0) { + *root = onig_node_new_list(snode, NULL); + CHECK_NULL_RETURN_VAL(*root, ONIGERR_MEMORY); + *tailp = &(NCONS(*root).right); + } + else { + **tailp = onig_node_new_list(snode, NULL); + CHECK_NULL_RETURN_VAL(**tailp, ONIGERR_MEMORY); + *tailp = &(NCONS(**tailp).right); + } + + return 0; +} + static int divide_ambig_string_node(Node* node, regex_t* reg) { StrNode* sn = &NSTRING(node); int ambig, prev_ambig; UChar *prev, *p, *end, *prev_start, *start, *tmp, *wp; - Node *snode; Node *root = NULL_NODE; Node **tailp = (Node** )0; + int r; start = prev_start = p = sn->s; end = sn->end; @@ -2964,33 +3253,9 @@ divide_ambig_string_node(Node* node, regex_t* reg) if (prev_ambig != (ambig = ONIGENC_IS_MBC_AMBIGUOUS(reg->enc, reg->ambig_flag, &p, end))) { - if (prev_ambig != 0) { - tmp = prev_start; - wp = prev_start; - while (tmp < prev) { - wp += ONIGENC_MBC_TO_NORMALIZE(reg->enc, reg->ambig_flag, - &tmp, end, wp); - } - snode = onig_node_new_str(prev_start, wp); - CHECK_NULL_RETURN_VAL(snode, ONIGERR_MEMORY); - NSTRING_SET_AMBIG(snode); - if (wp != prev) NSTRING_SET_AMBIG_REDUCE(snode); - } - else { - snode = onig_node_new_str(prev_start, prev); - CHECK_NULL_RETURN_VAL(snode, ONIGERR_MEMORY); - } - - if (tailp == (Node** )0) { - root = onig_node_new_list(snode, NULL); - CHECK_NULL_RETURN_VAL(root, ONIGERR_MEMORY); - tailp = &(NCONS(root).right); - } - else { - *tailp = onig_node_new_list(snode, NULL); - CHECK_NULL_RETURN_VAL(*tailp, ONIGERR_MEMORY); - tailp = &(NCONS(*tailp).right); - } + r = divide_ambig_string_node_sub(reg, prev_ambig, prev_start, prev, + end, &tailp, &root); + if (r != 0) return r; prev_ambig = ambig; prev_start = prev; @@ -3011,41 +3276,157 @@ divide_ambig_string_node(Node* node, regex_t* reg) } } else { - if (prev_ambig != 0) { - tmp = prev_start; - wp = prev_start; - while (tmp < end) { - wp += ONIGENC_MBC_TO_NORMALIZE(reg->enc, reg->ambig_flag, - &tmp, end, wp); - } - snode = onig_node_new_str(prev_start, wp); - CHECK_NULL_RETURN_VAL(snode, ONIGERR_MEMORY); - NSTRING_SET_AMBIG(snode); - if (wp != end) NSTRING_SET_AMBIG_REDUCE(snode); + r = divide_ambig_string_node_sub(reg, prev_ambig, prev_start, end, + end, &tailp, &root); + if (r != 0) return r; + + swap_node(node, root); + onig_node_str_clear(root); /* should be after swap! */ + onig_node_free(root); /* free original string node */ + } + + return 0; +} + +#ifdef USE_COMBINATION_EXPLOSION_CHECK + +#define CEC_THRES_NUM_BIG_REPEAT 512 +#define CEC_INFINITE_NUM 0x7fffffff + +#define CEC_IN_INFINITE_REPEAT (1<<0) +#define CEC_IN_FINITE_REPEAT (1<<1) +#define CEC_CONT_BIG_REPEAT (1<<2) + +static int +setup_comb_exp_check(Node* node, int state, ScanEnv* env) +{ + int type; + int r = state; + + type = NTYPE(node); + switch (type) { + case N_LIST: + { + Node* prev = NULL_NODE; + do { + r = setup_comb_exp_check(NCONS(node).left, r, env); + prev = NCONS(node).left; + } while (r >= 0 && IS_NOT_NULL(node = NCONS(node).right)); } - else { - snode = onig_node_new_str(prev_start, end); - CHECK_NULL_RETURN_VAL(snode, ONIGERR_MEMORY); + break; + + case N_ALT: + { + int ret; + do { + ret = setup_comb_exp_check(NCONS(node).left, state, env); + r |= ret; + } while (ret >= 0 && IS_NOT_NULL(node = NCONS(node).right)); } + break; - if (tailp == (Node** )0) { - root = onig_node_new_list(snode, NULL); - CHECK_NULL_RETURN_VAL(root, ONIGERR_MEMORY); - tailp = &(NCONS(node).right); + case N_QUANTIFIER: + { + int child_state = state; + int add_state = 0; + QuantifierNode* qn = &(NQUANTIFIER(node)); + Node* target = qn->target; + int var_num; + + if (! IS_REPEAT_INFINITE(qn->upper)) { + if (qn->upper > 1) { + /* {0,1}, {1,1} are allowed */ + child_state |= CEC_IN_FINITE_REPEAT; + + /* check (a*){n,m}, (a+){n,m} => (a*){n,n}, (a+){n,n} */ + if (env->backrefed_mem == 0) { + if (NTYPE(qn->target) == N_EFFECT) { + EffectNode* en = &(NEFFECT(qn->target)); + if (en->type == EFFECT_MEMORY) { + if (NTYPE(en->target) == N_QUANTIFIER) { + QuantifierNode* q = &(NQUANTIFIER(en->target)); + if (IS_REPEAT_INFINITE(q->upper) + && q->greedy == qn->greedy) { + qn->upper = (qn->lower == 0 ? 1 : qn->lower); + if (qn->upper == 1) + child_state = state; + } + } + } + } + } + } + } + + if (state & CEC_IN_FINITE_REPEAT) { + qn->comb_exp_check_num = -1; + } + else { + if (IS_REPEAT_INFINITE(qn->upper)) { + var_num = CEC_INFINITE_NUM; + child_state |= CEC_IN_INFINITE_REPEAT; + } + else { + var_num = qn->upper - qn->lower; + } + + if (var_num >= CEC_THRES_NUM_BIG_REPEAT) + add_state |= CEC_CONT_BIG_REPEAT; + + if (((state & CEC_IN_INFINITE_REPEAT) != 0 && var_num != 0) || + ((state & CEC_CONT_BIG_REPEAT) != 0 && + var_num >= CEC_THRES_NUM_BIG_REPEAT)) { + if (qn->comb_exp_check_num == 0) { + env->num_comb_exp_check++; + qn->comb_exp_check_num = env->num_comb_exp_check; + if (env->curr_max_regnum > env->comb_exp_max_regnum) + env->comb_exp_max_regnum = env->curr_max_regnum; + } + } + } + + r = setup_comb_exp_check(target, child_state, env); + r |= add_state; } - else { - *tailp = onig_node_new_list(snode, NULL); - CHECK_NULL_RETURN_VAL(*tailp, ONIGERR_MEMORY); - tailp = &(NCONS(*tailp).right); + break; + + case N_EFFECT: + { + EffectNode* en = &(NEFFECT(node)); + + switch (en->type) { + case EFFECT_MEMORY: + { + if (env->curr_max_regnum < en->regnum) + env->curr_max_regnum = en->regnum; + + r = setup_comb_exp_check(en->target, state, env); + } + break; + + default: + r = setup_comb_exp_check(en->target, state, env); + break; + } } + break; - swap_node(node, root); - onig_node_str_clear(root); /* should be after swap! */ - onig_node_free(root); /* free original string node */ +#ifdef USE_SUBEXP_CALL + case N_CALL: + if (IS_CALL_RECURSION(&(NCALL(node)))) + env->has_recursion = 1; + else + r = setup_comb_exp_check(NCALL(node).target, state, env); + break; +#endif + + default: + break; } - return 0; + return r; } +#endif #define IN_ALT (1<<0) #define IN_NOT (1<<1) @@ -3116,15 +3497,20 @@ setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env) if (p[i] > env->num_mem) return ONIGERR_INVALID_BACKREF; BIT_STATUS_ON_AT(env->backrefed_mem, p[i]); BIT_STATUS_ON_AT(env->bt_mem_start, p[i]); +#ifdef USE_BACKREF_AT_LEVEL + if (IS_BACKREF_NEST_LEVEL(br)) { + BIT_STATUS_ON_AT(env->bt_mem_end, p[i]); + } +#endif SET_EFFECT_STATUS(nodes[p[i]], NST_MEM_BACKREFED); } } break; - case N_QUALIFIER: + case N_QUANTIFIER: { OnigDistance d; - QualifierNode* qn = &(NQUALIFIER(node)); + QuantifierNode* qn = &(NQUANTIFIER(node)); Node* target = qn->target; if ((state & IN_REPEAT) != 0) { @@ -3137,7 +3523,7 @@ setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env) if (d == 0) { qn->target_empty_info = NQ_TARGET_IS_EMPTY; #ifdef USE_INFINITE_REPEAT_MONOMANIAC_MEM_STATUS_CHECK - r = qualifiers_memory_node_info(target); + r = quantifiers_memory_node_info(target); if (r < 0) break; if (r > 0) { qn->target_empty_info = r; @@ -3179,15 +3565,15 @@ setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env) if (r) break; } onig_node_free(target); - break; /* break case N_QUALIFIER: */ + break; /* break case N_QUANTIFIER: */ } } } #ifdef USE_OP_PUSH_OR_JUMP_EXACT if (qn->greedy && (qn->target_empty_info != 0)) { - if (NTYPE(target) == N_QUALIFIER) { - QualifierNode* tqn = &(NQUALIFIER(target)); + if (NTYPE(target) == N_QUANTIFIER) { + QuantifierNode* tqn = &(NQUANTIFIER(target)); if (IS_NOT_NULL(tqn->head_exact)) { qn->head_exact = tqn->head_exact; tqn->head_exact = NULL; @@ -3227,8 +3613,8 @@ setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env) { Node* target = en->target; r = setup_tree(target, reg, state, env); - if (NTYPE(target) == N_QUALIFIER) { - QualifierNode* tqn = &(NQUALIFIER(target)); + if (NTYPE(target) == N_QUANTIFIER) { + QuantifierNode* tqn = &(NQUANTIFIER(target)); if (IS_REPEAT_INFINITE(tqn->upper) && tqn->lower <= 1 && tqn->greedy != 0) { /* (?>a*), a*+ etc... */ int qtype = NTYPE(tqn->target); @@ -3257,17 +3643,15 @@ setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env) /* allowed node types in look-behind */ #define ALLOWED_TYPE_IN_LB \ ( N_LIST | N_ALT | N_STRING | N_CCLASS | N_CTYPE | \ - N_ANYCHAR | N_ANCHOR | N_EFFECT | N_QUALIFIER | N_CALL ) + N_ANYCHAR | N_ANCHOR | N_EFFECT | N_QUANTIFIER | N_CALL ) #define ALLOWED_EFFECT_IN_LB ( EFFECT_MEMORY ) #define ALLOWED_EFFECT_IN_LB_NOT 0 #define ALLOWED_ANCHOR_IN_LB \ -( ANCHOR_LOOK_BEHIND | ANCHOR_BEGIN_LINE | ANCHOR_END_LINE | ANCHOR_BEGIN_BUF ) +( ANCHOR_LOOK_BEHIND | ANCHOR_BEGIN_LINE | ANCHOR_END_LINE | ANCHOR_BEGIN_BUF | ANCHOR_BEGIN_POSITION ) #define ALLOWED_ANCHOR_IN_LB_NOT \ -( ANCHOR_LOOK_BEHIND_NOT | ANCHOR_BEGIN_LINE | ANCHOR_END_LINE | ANCHOR_BEGIN_BUF ) - /* can't allow all anchors, because \G in look-behind through Search(). - ex. /(?<=\G)zz/.match("azz") => success. */ +( ANCHOR_LOOK_BEHIND | ANCHOR_LOOK_BEHIND_NOT | ANCHOR_BEGIN_LINE | ANCHOR_END_LINE | ANCHOR_BEGIN_BUF | ANCHOR_BEGIN_POSITION ) case ANCHOR_LOOK_BEHIND: { @@ -3383,7 +3767,7 @@ typedef struct { static int map_position_value(OnigEncoding enc, int i) { - static short int ByteValTable[] = { + static const short int ByteValTable[] = { 5, 1, 1, 1, 1, 1, 1, 1, 1, 10, 10, 1, 1, 10, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 12, 4, 7, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, @@ -3408,7 +3792,7 @@ static int distance_value(MinMaxLen* mm) { /* 1000 / (min-max-dist + 1) */ - static short int dist_vals[] = { + static const short int dist_vals[] = { 1000, 500, 333, 250, 200, 167, 143, 125, 111, 100, 91, 83, 77, 71, 67, 63, 59, 56, 53, 50, 48, 45, 43, 42, 40, 38, 37, 36, 34, 33, @@ -3604,9 +3988,10 @@ copy_opt_exact_info(OptExactInfo* to, OptExactInfo* from) } static void -concat_opt_exact_info(OptExactInfo* to, OptExactInfo* add) +concat_opt_exact_info(OptExactInfo* to, OptExactInfo* add, OnigEncoding enc) { - int i, n; + int i, j, len; + UChar *p, *end; OptAncInfo tanc; if (! to->ignore_case && add->ignore_case) { @@ -3615,11 +4000,17 @@ concat_opt_exact_info(OptExactInfo* to, OptExactInfo* add) to->ignore_case = 1; } - for (i = to->len, n = 0; n < add->len && i < OPT_EXACT_MAXLEN; i++, n++) - to->s[i] = add->s[n]; + p = add->s; + end = p + add->len; + for (i = to->len; p < end; ) { + len = enc_len(enc, p); + if (i + len > OPT_EXACT_MAXLEN) break; + for (j = 0; j < len && p < end; j++) + to->s[i++] = *p++; + } to->len = i; - to->reach_end = (n == add->len ? add->reach_end : 0); + to->reach_end = (p == end ? add->reach_end : 0); concat_opt_anc_info(&tanc, &to->anc, &add->anc, 1, 1); if (! to->reach_end) tanc.right_anchor = 0; @@ -3634,15 +4025,10 @@ concat_opt_exact_info_str(OptExactInfo* to, UChar *p; for (i = to->len, p = s; p < end && i < OPT_EXACT_MAXLEN; ) { - if (raw) { + len = enc_len(enc, p); + if (i + len > OPT_EXACT_MAXLEN) break; + for (j = 0; j < len && p < end; j++) to->s[i++] = *p++; - } - else { - len = enc_len(enc, p); - if (i + len > OPT_EXACT_MAXLEN) break; - for (j = 0; j < len; j++) - to->s[i++] = *p++; - } } to->len = i; @@ -3692,7 +4078,14 @@ select_opt_exact_info(OnigEncoding enc, OptExactInfo* now, OptExactInfo* alt) v1 = now->len; v2 = alt->len; - if (v1 <= 2 && v2 <= 2) { + if (v2 == 0) { + return ; + } + else if (v1 == 0) { + copy_opt_exact_info(now, alt); + return ; + } + else if (v1 <= 2 && v2 <= 2) { /* ByteValTable[x] is big value --> low price */ v2 = map_position_value(enc, now->s[0]); v1 = map_position_value(enc, alt->s[0]); @@ -3711,7 +4104,7 @@ select_opt_exact_info(OnigEncoding enc, OptExactInfo* now, OptExactInfo* alt) static void clear_opt_map_info(OptMapInfo* map) { - static OptMapInfo clean_info = { + static const OptMapInfo clean_info = { {0, 0}, {0, 0}, 0, { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, @@ -3755,11 +4148,10 @@ static int add_char_amb_opt_map_info(OptMapInfo* map, UChar* p, UChar* end, OnigEncoding enc, OnigAmbigType ambig_flag) { - int i, j, n, len; + int i, n, len; UChar buf[ONIGENC_MBC_NORMALIZE_MAXLEN]; - OnigCodePoint code, ccode; - OnigCompAmbigCodes* ccs; - OnigPairAmbigCodes* pccs; + OnigCodePoint code; + const OnigPairAmbigCodes* pccs; OnigAmbigType amb; add_char_opt_map_info(map, p[0], enc); @@ -3776,21 +4168,6 @@ add_char_amb_opt_map_info(OptMapInfo* map, UChar* p, UChar* end, add_char_opt_map_info(map, buf[0], enc); } } - - if ((ambig_flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0) { - n = ONIGENC_GET_ALL_COMP_AMBIG_CODES(enc, amb, &ccs); - for (i = 0; i < n; i++) { - if (ccs[i].code == code) { - for (j = 0; j < ccs[i].n; j++) { - ccode = ccs[i].items[j].code[0]; - len = ONIGENC_CODE_TO_MBC(enc, ccode, buf); - if (len < 0) return len; - add_char_opt_map_info(map, buf[0], enc); - } - break; - } - } - } } return 0; } @@ -3907,11 +4284,11 @@ concat_left_node_opt_info(OnigEncoding enc, NodeOptInfo* to, NodeOptInfo* add) if (add->exb.len > 0) { if (exb_reach) { - concat_opt_exact_info(&to->exb, &add->exb); + concat_opt_exact_info(&to->exb, &add->exb, enc); clear_opt_exact_info(&add->exb); } else if (exm_reach) { - concat_opt_exact_info(&to->exm, &add->exb); + concat_opt_exact_info(&to->exm, &add->exb, enc); clear_opt_exact_info(&add->exb); } } @@ -4184,12 +4561,12 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env) break; #endif - case N_QUALIFIER: + case N_QUANTIFIER: { int i; OnigDistance min, max; NodeOptInfo nopt; - QualifierNode* qn = &(NQUALIFIER(node)); + QuantifierNode* qn = &(NQUANTIFIER(node)); r = optimize_node_left(qn->target, &nopt, env); if (r) break; @@ -4197,8 +4574,8 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env) if (qn->lower == 0 && IS_REPEAT_INFINITE(qn->upper)) { if (env->mmd.max == 0 && NTYPE(qn->target) == N_ANYCHAR && qn->greedy) { - if (IS_POSIXLINE(env->options)) - add_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR_PL); + if (IS_MULTILINE(env->options)) + add_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR_ML); else add_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR); } @@ -4210,7 +4587,7 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env) if (nopt.exb.reach_end) { for (i = 2; i < qn->lower && ! is_full_opt_exact_info(&opt->exb); i++) { - concat_opt_exact_info(&opt->exb, &nopt.exb); + concat_opt_exact_info(&opt->exb, &nopt.exb, env->enc); } if (i < qn->lower) { opt->exb.reach_end = 0; @@ -4316,10 +4693,7 @@ set_optimize_exact_info(regex_t* reg, OptExactInfo* e) CHECK_NULL_RETURN_VAL(reg->exact, ONIGERR_MEMORY); reg->exact_end = reg->exact + e->len; - if (e->anc.left_anchor & ANCHOR_BEGIN_LINE) - allow_reverse = 1; - else - allow_reverse = + allow_reverse = ONIGENC_IS_ALLOWED_REVERSE_MATCH(reg->enc, reg->exact, reg->exact_end); if (e->len >= 3 || (e->len >= 2 && allow_reverse)) { @@ -4391,7 +4765,7 @@ set_optimize_info_from_tree(Node* node, regex_t* reg, ScanEnv* scan_env) if (r) return r; reg->anchor = opt.anc.left_anchor & (ANCHOR_BEGIN_BUF | - ANCHOR_BEGIN_POSITION | ANCHOR_ANYCHAR_STAR | ANCHOR_ANYCHAR_STAR_PL); + ANCHOR_BEGIN_POSITION | ANCHOR_ANYCHAR_STAR | ANCHOR_ANYCHAR_STAR_ML); reg->anchor |= opt.anc.right_anchor & (ANCHOR_END_BUF | ANCHOR_SEMI_END_BUF); @@ -4446,6 +4820,38 @@ clear_optimize_info(regex_t* reg) #ifdef ONIG_DEBUG +static void print_enc_string(FILE* fp, OnigEncoding enc, + const UChar *s, const UChar *end) +{ + fprintf(fp, "\nPATTERN: /"); + + if (ONIGENC_MBC_MINLEN(enc) > 1) { + const UChar *p; + OnigCodePoint code; + + p = s; + while (p < end) { + code = ONIGENC_MBC_TO_CODE(enc, p, end); + if (code >= 0x80) { + fprintf(fp, " 0x%04x ", (int )code); + } + else { + fputc((int )code, fp); + } + + p += enc_len(enc, p); + } + } + else { + while (s < end) { + fputc((int )*s, fp); + s++; + } + } + + fprintf(fp, "/\n"); +} + static void print_distance_range(FILE* f, OnigDistance a, OnigDistance b) { @@ -4503,7 +4909,7 @@ print_anchor(FILE* f, int anchor) q = 1; fprintf(f, "anychar-star"); } - if (anchor & ANCHOR_ANYCHAR_STAR_PL) { + if (anchor & ANCHOR_ANYCHAR_STAR_ML) { if (q) fprintf(f, ", "); fprintf(f, "anychar-star-pl"); } @@ -4514,8 +4920,8 @@ print_anchor(FILE* f, int anchor) static void print_optimize_info(FILE* f, regex_t* reg) { - static char* on[] = { "NONE", "EXACT", "EXACT_BM", "EXACT_BM_NOT_REV", - "EXACT_IC", "MAP" }; + static const char* on[] = { "NONE", "EXACT", "EXACT_BM", "EXACT_BM_NOT_REV", + "EXACT_IC", "MAP" }; fprintf(f, "optimize: %s\n", on[reg->optimize]); fprintf(f, " anchor: "); print_anchor(f, reg->anchor); @@ -4624,7 +5030,6 @@ onig_chain_reduce(regex_t* reg) { regex_t *head, *prev; - THREAD_ATOMIC_START; prev = reg; head = prev->chain; if (IS_NOT_NULL(head)) { @@ -4636,7 +5041,6 @@ onig_chain_reduce(regex_t* reg) prev->chain = (regex_t* )NULL; REGEX_TRANSFER(reg, head); } - THREAD_ATOMIC_END; } #if 0 @@ -4739,6 +5143,10 @@ onig_compile(regex_t* reg, const UChar* pattern, const UChar* pattern_end, reg->state = ONIG_STATE_COMPILING; +#ifdef ONIG_DEBUG + print_enc_string(stderr, reg->enc, pattern, pattern_end); +#endif + if (reg->alloc == 0) { init_size = (pattern_end - pattern) * 2; if (init_size <= 0) init_size = COMPILE_INIT_SIZE; @@ -4753,6 +5161,9 @@ onig_compile(regex_t* reg, const UChar* pattern, const UChar* pattern_end, reg->num_null_check = 0; reg->repeat_range_alloc = 0; reg->repeat_range = (OnigRepeatRange* )NULL; +#ifdef USE_COMBINATION_EXPLOSION_CHECK + reg->num_comb_exp_check = 0; +#endif r = onig_parse_make_tree(&root, pattern, pattern_end, reg, &scan_env); if (r != 0) goto err; @@ -4806,6 +5217,33 @@ onig_compile(regex_t* reg, const UChar* pattern, const UChar* pattern_end, reg->bt_mem_end |= reg->capture_history; } +#ifdef USE_COMBINATION_EXPLOSION_CHECK + if (scan_env.backrefed_mem == 0 +#ifdef USE_SUBEXP_CALL + || scan_env.num_call == 0 +#endif + ) { + setup_comb_exp_check(root, 0, &scan_env); +#ifdef USE_SUBEXP_CALL + if (scan_env.has_recursion != 0) { + scan_env.num_comb_exp_check = 0; + } + else +#endif + if (scan_env.comb_exp_max_regnum > 0) { + int i; + for (i = 1; i <= scan_env.comb_exp_max_regnum; i++) { + if (BIT_STATUS_AT(scan_env.backrefed_mem, i) != 0) { + scan_env.num_comb_exp_check = 0; + break; + } + } + } + } + + reg->num_comb_exp_check = scan_env.num_comb_exp_check; +#endif + clear_optimize_info(reg); #ifndef ONIG_DONT_OPTIMIZE r = set_optimize_info_from_tree(root, reg, &scan_env); @@ -4864,6 +5302,7 @@ onig_compile(regex_t* reg, const UChar* pattern, const UChar* pattern_end, err: if (IS_NOT_NULL(scan_env.error)) { if (IS_NOT_NULL(einfo)) { + einfo->enc = scan_env.enc; einfo->par = scan_env.error; einfo->par_end = scan_env.error_end; } @@ -4875,6 +5314,7 @@ onig_compile(regex_t* reg, const UChar* pattern, const UChar* pattern_end, return r; } +#ifdef USE_RECOMPILE_API extern int onig_recompile(regex_t* reg, const UChar* pattern, const UChar* pattern_end, OnigOptionType option, OnigEncoding enc, OnigSyntaxType* syntax, @@ -4893,6 +5333,7 @@ onig_recompile(regex_t* reg, const UChar* pattern, const UChar* pattern_end, } return 0; } +#endif static int onig_inited = 0; @@ -4906,6 +5347,11 @@ onig_alloc_init(regex_t** reg, OnigOptionType option, OnigAmbigType ambig_flag, if (ONIGENC_IS_UNDEF(enc)) return ONIGERR_DEFAULT_ENCODING_IS_NOT_SETTED; + if ((option & (ONIG_OPTION_DONT_CAPTURE_GROUP|ONIG_OPTION_CAPTURE_GROUP)) + == (ONIG_OPTION_DONT_CAPTURE_GROUP|ONIG_OPTION_CAPTURE_GROUP)) { + return ONIGERR_INVALID_COMBINATION_OF_OPTIONS; + } + *reg = (regex_t* )xmalloc(sizeof(regex_t)); if (IS_NULL(*reg)) return ONIGERR_MEMORY; (*reg)->state = ONIG_STATE_MODIFY; @@ -4959,13 +5405,14 @@ onig_new(regex_t** reg, const UChar* pattern, const UChar* pattern_end, } extern int -onig_init() +onig_init(void) { if (onig_inited != 0) return 0; onig_inited = 1; + THREAD_SYSTEM_INIT; THREAD_ATOMIC_START; onigenc_init(); @@ -4981,9 +5428,9 @@ onig_init() extern int -onig_end() +onig_end(void) { - extern int onig_free_shared_cclass_table(); + extern int onig_free_shared_cclass_table(void); THREAD_ATOMIC_START; @@ -4991,23 +5438,34 @@ onig_end() onig_print_statistics(stderr); #endif -#ifdef USE_RECYCLE_NODE - onig_free_node_list(); -#endif - #ifdef USE_SHARED_CCLASS_TABLE onig_free_shared_cclass_table(); #endif +#ifdef USE_RECYCLE_NODE + onig_free_node_list(); +#endif + onig_inited = 0; THREAD_ATOMIC_END; + THREAD_SYSTEM_END; return 0; } #ifdef ONIG_DEBUG +/* arguments type */ +#define ARG_SPECIAL -1 +#define ARG_NON 0 +#define ARG_RELADDR 1 +#define ARG_ABSADDR 2 +#define ARG_LENGTH 3 +#define ARG_MEMNUM 4 +#define ARG_OPTION 5 +#define ARG_STATE_CHECK 6 + OnigOpInfoType OnigOpInfo[] = { { OP_FINISH, "finish", ARG_NON }, { OP_END, "end", ARG_NON }, @@ -5038,62 +5496,66 @@ OnigOpInfoType OnigOpInfo[] = { { OP_ANYCHAR_ML_STAR, "anychar-ml*", ARG_NON }, { OP_ANYCHAR_STAR_PEEK_NEXT, "anychar*-peek-next", ARG_SPECIAL }, { OP_ANYCHAR_ML_STAR_PEEK_NEXT, "anychar-ml*-peek-next", ARG_SPECIAL }, - { OP_WORD, "word", ARG_NON }, - { OP_NOT_WORD, "not-word", ARG_NON }, - { OP_WORD_SB, "word-sb", ARG_NON }, - { OP_WORD_MB, "word-mb", ARG_NON }, - { OP_WORD_BOUND, "word-bound", ARG_NON }, - { OP_NOT_WORD_BOUND, "not-word-bound", ARG_NON }, - { OP_WORD_BEGIN, "word-begin", ARG_NON }, - { OP_WORD_END, "word-end", ARG_NON }, - { OP_BEGIN_BUF, "begin-buf", ARG_NON }, - { OP_END_BUF, "end-buf", ARG_NON }, - { OP_BEGIN_LINE, "begin-line", ARG_NON }, - { OP_END_LINE, "end-line", ARG_NON }, - { OP_SEMI_END_BUF, "semi-end-buf", ARG_NON }, - { OP_BEGIN_POSITION, "begin-position", ARG_NON }, - { OP_BACKREF1, "backref1", ARG_NON }, - { OP_BACKREF2, "backref2", ARG_NON }, - { OP_BACKREF3, "backref3", ARG_NON }, - { OP_BACKREFN, "backrefn", ARG_MEMNUM }, - { OP_BACKREFN_IC, "backrefn-ic", ARG_SPECIAL }, - { OP_BACKREF_MULTI, "backref_multi", ARG_SPECIAL }, - { OP_BACKREF_MULTI_IC, "backref_multi-ic",ARG_SPECIAL }, - { OP_MEMORY_START_PUSH, "mem-start-push", ARG_MEMNUM }, - { OP_MEMORY_START, "mem-start", ARG_MEMNUM }, - { OP_MEMORY_END_PUSH, "mem-end-push", ARG_MEMNUM }, - { OP_MEMORY_END_PUSH_REC, "mem-end-push-rec", ARG_MEMNUM }, - { OP_MEMORY_END, "mem-end", ARG_MEMNUM }, - { OP_MEMORY_END_REC, "mem-end-rec", ARG_MEMNUM }, - { OP_SET_OPTION_PUSH, "set-option-push", ARG_OPTION }, - { OP_SET_OPTION, "set-option", ARG_OPTION }, - { OP_FAIL, "fail", ARG_NON }, - { OP_JUMP, "jump", ARG_RELADDR }, - { OP_PUSH, "push", ARG_RELADDR }, - { OP_POP, "pop", ARG_NON }, - { OP_PUSH_OR_JUMP_EXACT1, "push-or-jump-e1", ARG_SPECIAL }, - { OP_PUSH_IF_PEEK_NEXT, "push-if-peek-next", ARG_SPECIAL }, - { OP_REPEAT, "repeat", ARG_SPECIAL }, - { OP_REPEAT_NG, "repeat-ng", ARG_SPECIAL }, - { OP_REPEAT_INC, "repeat-inc", ARG_MEMNUM }, - { OP_REPEAT_INC_NG, "repeat-inc-ng", ARG_MEMNUM }, - { OP_REPEAT_INC_SG, "repeat-inc-sg", ARG_MEMNUM }, - { OP_REPEAT_INC_NG_SG, "repeat-inc-ng-sg", ARG_MEMNUM }, - { OP_NULL_CHECK_START, "null-check-start",ARG_MEMNUM }, - { OP_NULL_CHECK_END, "null-check-end", ARG_MEMNUM }, - { OP_NULL_CHECK_END_MEMST,"null-check-end-memst", ARG_MEMNUM }, - { OP_NULL_CHECK_END_MEMST_PUSH,"null-check-end-memst-push", ARG_MEMNUM }, - { OP_PUSH_POS, "push-pos", ARG_NON }, - { OP_POP_POS, "pop-pos", ARG_NON }, - { OP_PUSH_POS_NOT, "push-pos-not", ARG_RELADDR }, - { OP_FAIL_POS, "fail-pos", ARG_NON }, - { OP_PUSH_STOP_BT, "push-stop-bt", ARG_NON }, - { OP_POP_STOP_BT, "pop-stop-bt", ARG_NON }, - { OP_LOOK_BEHIND, "look-behind", ARG_SPECIAL }, + { OP_WORD, "word", ARG_NON }, + { OP_NOT_WORD, "not-word", ARG_NON }, + { OP_WORD_BOUND, "word-bound", ARG_NON }, + { OP_NOT_WORD_BOUND, "not-word-bound", ARG_NON }, + { OP_WORD_BEGIN, "word-begin", ARG_NON }, + { OP_WORD_END, "word-end", ARG_NON }, + { OP_BEGIN_BUF, "begin-buf", ARG_NON }, + { OP_END_BUF, "end-buf", ARG_NON }, + { OP_BEGIN_LINE, "begin-line", ARG_NON }, + { OP_END_LINE, "end-line", ARG_NON }, + { OP_SEMI_END_BUF, "semi-end-buf", ARG_NON }, + { OP_BEGIN_POSITION, "begin-position", ARG_NON }, + { OP_BACKREF1, "backref1", ARG_NON }, + { OP_BACKREF2, "backref2", ARG_NON }, + { OP_BACKREFN, "backrefn", ARG_MEMNUM }, + { OP_BACKREFN_IC, "backrefn-ic", ARG_SPECIAL }, + { OP_BACKREF_MULTI, "backref_multi", ARG_SPECIAL }, + { OP_BACKREF_MULTI_IC, "backref_multi-ic", ARG_SPECIAL }, + { OP_BACKREF_AT_LEVEL, "backref_at_level", ARG_SPECIAL }, + { OP_MEMORY_START_PUSH, "mem-start-push", ARG_MEMNUM }, + { OP_MEMORY_START, "mem-start", ARG_MEMNUM }, + { OP_MEMORY_END_PUSH, "mem-end-push", ARG_MEMNUM }, + { OP_MEMORY_END_PUSH_REC, "mem-end-push-rec", ARG_MEMNUM }, + { OP_MEMORY_END, "mem-end", ARG_MEMNUM }, + { OP_MEMORY_END_REC, "mem-end-rec", ARG_MEMNUM }, + { OP_SET_OPTION_PUSH, "set-option-push", ARG_OPTION }, + { OP_SET_OPTION, "set-option", ARG_OPTION }, + { OP_FAIL, "fail", ARG_NON }, + { OP_JUMP, "jump", ARG_RELADDR }, + { OP_PUSH, "push", ARG_RELADDR }, + { OP_POP, "pop", ARG_NON }, + { OP_PUSH_OR_JUMP_EXACT1, "push-or-jump-e1", ARG_SPECIAL }, + { OP_PUSH_IF_PEEK_NEXT, "push-if-peek-next", ARG_SPECIAL }, + { OP_REPEAT, "repeat", ARG_SPECIAL }, + { OP_REPEAT_NG, "repeat-ng", ARG_SPECIAL }, + { OP_REPEAT_INC, "repeat-inc", ARG_MEMNUM }, + { OP_REPEAT_INC_NG, "repeat-inc-ng", ARG_MEMNUM }, + { OP_REPEAT_INC_SG, "repeat-inc-sg", ARG_MEMNUM }, + { OP_REPEAT_INC_NG_SG, "repeat-inc-ng-sg", ARG_MEMNUM }, + { OP_NULL_CHECK_START, "null-check-start", ARG_MEMNUM }, + { OP_NULL_CHECK_END, "null-check-end", ARG_MEMNUM }, + { OP_NULL_CHECK_END_MEMST,"null-check-end-memst", ARG_MEMNUM }, + { OP_NULL_CHECK_END_MEMST_PUSH,"null-check-end-memst-push", ARG_MEMNUM }, + { OP_PUSH_POS, "push-pos", ARG_NON }, + { OP_POP_POS, "pop-pos", ARG_NON }, + { OP_PUSH_POS_NOT, "push-pos-not", ARG_RELADDR }, + { OP_FAIL_POS, "fail-pos", ARG_NON }, + { OP_PUSH_STOP_BT, "push-stop-bt", ARG_NON }, + { OP_POP_STOP_BT, "pop-stop-bt", ARG_NON }, + { OP_LOOK_BEHIND, "look-behind", ARG_SPECIAL }, { OP_PUSH_LOOK_BEHIND_NOT, "push-look-behind-not", ARG_SPECIAL }, { OP_FAIL_LOOK_BEHIND_NOT, "fail-look-behind-not", ARG_NON }, - { OP_CALL, "call", ARG_ABSADDR }, - { OP_RETURN, "return", ARG_NON }, + { OP_CALL, "call", ARG_ABSADDR }, + { OP_RETURN, "return", ARG_NON }, + { OP_STATE_CHECK_PUSH, "state-check-push", ARG_SPECIAL }, + { OP_STATE_CHECK_PUSH_OR_JUMP, "state-check-push-or-jump", ARG_SPECIAL }, + { OP_STATE_CHECK, "state-check", ARG_STATE_CHECK }, + { OP_STATE_CHECK_ANYCHAR_STAR, "state-check-anychar*", ARG_STATE_CHECK }, + { OP_STATE_CHECK_ANYCHAR_ML_STAR, + "state-check-anychar-ml*", ARG_STATE_CHECK }, { -1, "", ARG_NON } }; @@ -5152,6 +5614,7 @@ onig_print_compiled_byte_code(FILE* f, UChar* bp, UChar** nextp, RelAddrType addr; LengthType len; MemNumType mem; + StateCheckNumType scn; OnigCodePoint code; UChar *q; @@ -5186,6 +5649,12 @@ onig_print_compiled_byte_code(FILE* f, UChar* bp, UChar** nextp, fprintf(f, ":%d", option); } break; + + case ARG_STATE_CHECK: + scn = *((StateCheckNumType* )bp); + bp += SIZE_STATE_CHECK_NUM; + fprintf(f, ":%d", scn); + break; } } else { @@ -5312,6 +5781,26 @@ onig_print_compiled_byte_code(FILE* f, UChar* bp, UChar** nextp, } break; + case OP_BACKREF_AT_LEVEL: + { + OnigOptionType option; + LengthType level; + + GET_OPTION_INC(option, bp); + fprintf(f, ":%d", option); + GET_LENGTH_INC(level, bp); + fprintf(f, ":%d", level); + + fputs(" ", f); + GET_LENGTH_INC(len, bp); + for (i = 0; i < len; i++) { + GET_MEMNUM_INC(mem, bp); + if (i > 0) fputs(", ", f); + fprintf(f, "%d", mem); + } + } + break; + case OP_REPEAT: case OP_REPEAT_NG: { @@ -5343,6 +5832,15 @@ onig_print_compiled_byte_code(FILE* f, UChar* bp, UChar** nextp, fprintf(f, ":%d:(%d)", len, addr); break; + case OP_STATE_CHECK_PUSH: + case OP_STATE_CHECK_PUSH_OR_JUMP: + scn = *((StateCheckNumType* )bp); + bp += SIZE_STATE_CHECK_NUM; + addr = *((RelAddrType* )bp); + bp += SIZE_RELADDR; + fprintf(f, ":%d:(%d)", scn, addr); + break; + default: fprintf(stderr, "onig_print_compiled_byte_code: undefined code %d\n", *--bp); @@ -5497,11 +5995,11 @@ print_indent_tree(FILE* f, Node* node, int indent) break; #endif - case N_QUALIFIER: - fprintf(f, "{%d,%d}%s\n", (int )node, - NQUALIFIER(node).lower, NQUALIFIER(node).upper, - (NQUALIFIER(node).greedy ? "" : "?")); - print_indent_tree(f, NQUALIFIER(node).target, indent + add); + case N_QUANTIFIER: + fprintf(f, "{%d,%d}%s\n", (int )node, + NQUANTIFIER(node).lower, NQUANTIFIER(node).upper, + (NQUANTIFIER(node).greedy ? "" : "?")); + print_indent_tree(f, NQUANTIFIER(node).target, indent + add); break; case N_EFFECT: @@ -5530,7 +6028,7 @@ print_indent_tree(FILE* f, Node* node, int indent) break; } - if (type != N_LIST && type != N_ALT && type != N_QUALIFIER && + if (type != N_LIST && type != N_ALT && type != N_QUANTIFIER && type != N_EFFECT) fprintf(f, "\n"); fflush(f); diff --git a/ext/mbstring/oniguruma/regenc.c b/ext/mbstring/oniguruma/regenc.c index a767ca60b6..958917e122 100644 --- a/ext/mbstring/oniguruma/regenc.c +++ b/ext/mbstring/oniguruma/regenc.c @@ -2,7 +2,7 @@ regenc.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2005 K.Kosako + * Copyright (c) 2002-2007 K.Kosako * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -32,13 +32,13 @@ OnigEncoding OnigEncDefaultCharEncoding = ONIG_ENCODING_INIT_DEFAULT; extern int -onigenc_init() +onigenc_init(void) { return 0; } extern OnigEncoding -onigenc_get_default_encoding() +onigenc_get_default_encoding(void) { return OnigEncDefaultCharEncoding; } @@ -175,7 +175,7 @@ onigenc_str_bytelen_null(OnigEncoding enc, const UChar* s) #define USE_APPLICATION_TO_LOWER_CASE_TABLE -unsigned short OnigEnc_Unicode_ISO_8859_1_CtypeTable[256] = { +const unsigned short OnigEnc_Unicode_ISO_8859_1_CtypeTable[256] = { 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x228c, 0x2289, 0x2288, 0x2288, 0x2288, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, @@ -251,7 +251,7 @@ static const UChar BuiltInAsciiToLowerCaseTable[] = { #endif /* not USE_APPLICATION_TO_LOWER_CASE_TABLE */ #ifdef USE_UPPER_CASE_TABLE -UChar OnigEncAsciiToUpperCaseTable[256] = { +const UChar OnigEncAsciiToUpperCaseTable[256] = { '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007', '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017', '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027', @@ -287,7 +287,7 @@ UChar OnigEncAsciiToUpperCaseTable[256] = { }; #endif -unsigned short OnigEncAsciiCtypeTable[256] = { +const unsigned short OnigEncAsciiCtypeTable[256] = { 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x220c, 0x2209, 0x2208, 0x2208, 0x2208, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, 0x2008, @@ -323,7 +323,7 @@ unsigned short OnigEncAsciiCtypeTable[256] = { 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000 }; -UChar OnigEncISO_8859_1_ToLowerCaseTable[256] = { +const UChar OnigEncISO_8859_1_ToLowerCaseTable[256] = { '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007', '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017', '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027', @@ -359,7 +359,7 @@ UChar OnigEncISO_8859_1_ToLowerCaseTable[256] = { }; #ifdef USE_UPPER_CASE_TABLE -UChar OnigEncISO_8859_1_ToUpperCaseTable[256] = { +const UChar OnigEncISO_8859_1_ToUpperCaseTable[256] = { '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007', '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017', '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027', @@ -417,7 +417,7 @@ onigenc_get_left_adjust_char_head(OnigEncoding enc, const UChar* start, const UC return ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s); } -OnigPairAmbigCodes OnigAsciiPairAmbigCodes[] = { +const OnigPairAmbigCodes OnigAsciiPairAmbigCodes[] = { { 0x41, 0x61 }, { 0x42, 0x62 }, { 0x43, 0x63 }, @@ -475,7 +475,7 @@ OnigPairAmbigCodes OnigAsciiPairAmbigCodes[] = { extern int onigenc_ascii_get_all_pair_ambig_codes(OnigAmbigType flag, - OnigPairAmbigCodes** ccs) + const OnigPairAmbigCodes** ccs) { if (flag == ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE) { *ccs = OnigAsciiPairAmbigCodes; @@ -488,16 +488,16 @@ onigenc_ascii_get_all_pair_ambig_codes(OnigAmbigType flag, extern int onigenc_nothing_get_all_comp_ambig_codes(OnigAmbigType flag, - OnigCompAmbigCodes** ccs) + const OnigCompAmbigCodes** ccs) { return 0; } extern int onigenc_iso_8859_1_get_all_pair_ambig_codes(OnigAmbigType flag, - OnigPairAmbigCodes** ccs) + const OnigPairAmbigCodes** ccs) { - static OnigPairAmbigCodes cc[] = { + static const OnigPairAmbigCodes cc[] = { { 0xc0, 0xe0 }, { 0xc1, 0xe1 }, { 0xc2, 0xe2 }, @@ -577,9 +577,9 @@ onigenc_iso_8859_1_get_all_pair_ambig_codes(OnigAmbigType flag, extern int onigenc_ess_tsett_get_all_comp_ambig_codes(OnigAmbigType flag, - OnigCompAmbigCodes** ccs) + const OnigCompAmbigCodes** ccs) { - static OnigCompAmbigCodes folds[] = { + static const OnigCompAmbigCodes folds[] = { { 2, 0xdf, {{ 2, { 0x53, 0x53 } }, { 2, { 0x73, 0x73} } } } }; @@ -593,7 +593,7 @@ onigenc_ess_tsett_get_all_comp_ambig_codes(OnigAmbigType flag, extern int onigenc_not_support_get_ctype_code_range(int ctype, - OnigCodePoint* sbr[], OnigCodePoint* mbr[]) + const OnigCodePoint* sbr[], const OnigCodePoint* mbr[]) { return ONIG_NO_SUPPORT_CONFIG; } @@ -830,10 +830,10 @@ onigenc_mb4_code_to_mbc(OnigEncoding enc, OnigCodePoint code, UChar *buf) if ((code & 0xff000000) != 0) { *p++ = (UChar )((code >> 24) & 0xff); } - if ((code & 0xff0000) != 0) { + if ((code & 0xff0000) != 0 || p != buf) { *p++ = (UChar )((code >> 16) & 0xff); } - if ((code & 0xff00) != 0) { + if ((code & 0xff00) != 0 || p != buf) { *p++ = (UChar )((code >> 8) & 0xff); } *p++ = (UChar )(code & 0xff); @@ -849,40 +849,32 @@ extern int onigenc_mb2_is_code_ctype(OnigEncoding enc, OnigCodePoint code, unsigned int ctype) { - if ((ctype & ONIGENC_CTYPE_WORD) != 0) { - if (code < 128) - return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype); - else + if (code < 128) + return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype); + else { + if ((ctype & (ONIGENC_CTYPE_WORD | + ONIGENC_CTYPE_GRAPH | ONIGENC_CTYPE_PRINT)) != 0) { return (ONIGENC_CODE_TO_MBCLEN(enc, code) > 1 ? TRUE : FALSE); - - ctype &= ~ONIGENC_CTYPE_WORD; - if (ctype == 0) return FALSE; + } } - if (code < 128) - return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype); - else - return FALSE; + return FALSE; } extern int onigenc_mb4_is_code_ctype(OnigEncoding enc, OnigCodePoint code, unsigned int ctype) { - if ((ctype & ONIGENC_CTYPE_WORD) != 0) { - if (code < 128) - return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype); - else + if (code < 128) + return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype); + else { + if ((ctype & (ONIGENC_CTYPE_WORD | + ONIGENC_CTYPE_GRAPH | ONIGENC_CTYPE_PRINT)) != 0) { return (ONIGENC_CODE_TO_MBCLEN(enc, code) > 1 ? TRUE : FALSE); - - ctype &= ~ONIGENC_CTYPE_WORD; - if (ctype == 0) return FALSE; + } } - if (code < 128) - return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype); - else - return FALSE; + return FALSE; } extern int diff --git a/ext/mbstring/oniguruma/regenc.h b/ext/mbstring/oniguruma/regenc.h index 510455146e..58ee3e7f22 100644 --- a/ext/mbstring/oniguruma/regenc.h +++ b/ext/mbstring/oniguruma/regenc.h @@ -4,7 +4,7 @@ regenc.h - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2005 K.Kosako + * Copyright (c) 2002-2006 K.Kosako * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -65,15 +65,17 @@ #else /* ONIG_RUBY_M17N */ #define USE_UNICODE_FULL_RANGE_CTYPE +/* following must not use with USE_CRNL_AS_LINE_TERMINATOR */ +/* #define USE_UNICODE_ALL_LINE_TERMINATORS */ /* see Unicode.org UTF#18 */ #define ONIG_ENCODING_INIT_DEFAULT ONIG_ENCODING_ASCII /* for encoding system implementation (internal) */ -ONIG_EXTERN int onigenc_ascii_get_all_pair_ambig_codes P_((OnigAmbigType flag, OnigPairAmbigCodes** acs)); -ONIG_EXTERN int onigenc_nothing_get_all_comp_ambig_codes P_((OnigAmbigType flag, OnigCompAmbigCodes** acs)); -ONIG_EXTERN int onigenc_iso_8859_1_get_all_pair_ambig_codes P_((OnigAmbigType flag, OnigPairAmbigCodes** acs)); -ONIG_EXTERN int onigenc_ess_tsett_get_all_comp_ambig_codes P_((OnigAmbigType flag, OnigCompAmbigCodes** acs)); -ONIG_EXTERN int onigenc_not_support_get_ctype_code_range P_((int ctype, OnigCodePoint* sbr[], OnigCodePoint* mbr[])); +ONIG_EXTERN int onigenc_ascii_get_all_pair_ambig_codes P_((OnigAmbigType flag, const OnigPairAmbigCodes** acs)); +ONIG_EXTERN int onigenc_nothing_get_all_comp_ambig_codes P_((OnigAmbigType flag, const OnigCompAmbigCodes** acs)); +ONIG_EXTERN int onigenc_iso_8859_1_get_all_pair_ambig_codes P_((OnigAmbigType flag, const OnigPairAmbigCodes** acs)); +ONIG_EXTERN int onigenc_ess_tsett_get_all_comp_ambig_codes P_((OnigAmbigType flag, const OnigCompAmbigCodes** acs)); +ONIG_EXTERN int onigenc_not_support_get_ctype_code_range P_((int ctype, const OnigCodePoint* sbr[], const OnigCodePoint* mbr[])); ONIG_EXTERN int onigenc_is_mbc_newline_0x0a P_((const UChar* p, const UChar* end)); /* methods for single byte encoding */ @@ -105,7 +107,7 @@ ONIG_EXTERN int onigenc_get_all_fold_match_code_ss_0xdf P_((OnigCodePoint** code /* in enc/unicode.c */ ONIG_EXTERN int onigenc_unicode_is_code_ctype P_((OnigCodePoint code, unsigned int ctype)); -ONIG_EXTERN int onigenc_unicode_get_ctype_code_range P_((int ctype, OnigCodePoint* sbr[], OnigCodePoint* mbr[])); +ONIG_EXTERN int onigenc_unicode_get_ctype_code_range P_((int ctype, const OnigCodePoint* sbr[], const OnigCodePoint* mbr[])); #define ONIGENC_ISO_8859_1_TO_LOWER_CASE(c) \ @@ -115,10 +117,10 @@ ONIG_EXTERN int onigenc_unicode_get_ctype_code_range P_((int ctype, OnigCodePoin #define ONIGENC_IS_UNICODE_ISO_8859_1_CTYPE(code,ctype) \ ((OnigEnc_Unicode_ISO_8859_1_CtypeTable[code] & ctype) != 0) -ONIG_EXTERN UChar OnigEncISO_8859_1_ToLowerCaseTable[]; -ONIG_EXTERN UChar OnigEncISO_8859_1_ToUpperCaseTable[]; -ONIG_EXTERN unsigned short OnigEnc_Unicode_ISO_8859_1_CtypeTable[]; -ONIG_EXTERN OnigPairAmbigCodes OnigAsciiPairAmbigCodes[]; +ONIG_EXTERN const UChar OnigEncISO_8859_1_ToLowerCaseTable[]; +ONIG_EXTERN const UChar OnigEncISO_8859_1_ToUpperCaseTable[]; +ONIG_EXTERN const unsigned short OnigEnc_Unicode_ISO_8859_1_CtypeTable[]; +ONIG_EXTERN const OnigPairAmbigCodes OnigAsciiPairAmbigCodes[]; #endif /* is not ONIG_RUBY_M17N */ @@ -133,7 +135,7 @@ extern int onig_is_in_code_range P_((const UChar* p, OnigCodePoint code)); ONIG_EXTERN OnigEncoding OnigEncDefaultCharEncoding; ONIG_EXTERN const UChar* OnigEncAsciiToLowerCaseTable; ONIG_EXTERN const UChar OnigEncAsciiToUpperCaseTable[]; -ONIG_EXTERN unsigned short OnigEncAsciiCtypeTable[]; +ONIG_EXTERN const unsigned short OnigEncAsciiCtypeTable[]; #define ONIGENC_ASCII_CODE_TO_LOWER_CASE(c) OnigEncAsciiToLowerCaseTable[c] #define ONIGENC_ASCII_CODE_TO_UPPER_CASE(c) OnigEncAsciiToUpperCaseTable[c] diff --git a/ext/mbstring/oniguruma/regerror.c b/ext/mbstring/oniguruma/regerror.c index 413b985c35..d6ec91856d 100644 --- a/ext/mbstring/oniguruma/regerror.c +++ b/ext/mbstring/oniguruma/regerror.c @@ -2,7 +2,7 @@ regerror.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2005 K.Kosako + * Copyright (c) 2002-2006 K.Kosako * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -38,12 +38,12 @@ #define va_init_list(a,b) va_start(a) #endif -extern char* +extern UChar* onig_error_code_to_format(int code) { char *p; - if (code >= 0) return (char* )0; + if (code >= 0) return (UChar* )0; switch (code) { case ONIG_MISMATCH: @@ -170,6 +170,8 @@ onig_error_code_to_format(int code) p = "invalid character property name {%n}"; break; case ONIGERR_NOT_SUPPORTED_ENCODING_COMBINATION: p = "not supported encoding combination"; break; + case ONIGERR_INVALID_COMBINATION_OF_OPTIONS: + p = "invalid combination of options"; break; case ONIGERR_OVER_THREAD_PASS_LIMIT_COUNT: p = "over thread pass limit count"; break; @@ -177,7 +179,49 @@ onig_error_code_to_format(int code) p = "undefined error code"; break; } - return p; + return (UChar* )p; +} + + +static int to_ascii(OnigEncoding enc, UChar *s, UChar *end, + UChar buf[], int buf_size, int *is_over) +{ + int len; + UChar *p; + OnigCodePoint code; + + if (ONIGENC_MBC_MINLEN(enc) > 1) { + p = s; + len = 0; + while (p < end) { + code = ONIGENC_MBC_TO_CODE(enc, p, end); + if (code >= 0x80) { + if (len + 5 <= buf_size) { + sprintf((char* )(&(buf[len])), "\\%03o", + (unsigned int)(code & 0377)); + len += 5; + } + else { + break; + } + } + else { + buf[len++] = (UChar )code; + } + + p += enc_len(enc, p); + if (len >= buf_size) break; + } + + *is_over = ((p < end) ? 1 : 0); + } + else { + len = MIN((end - s), buf_size); + xmemcpy(buf, s, (size_t )len); + *is_over = ((buf_size < (end - s)) ? 1 : 0); + } + + return len; } @@ -196,7 +240,8 @@ onig_error_code_to_str(s, code, va_alist) { UChar *p, *q; OnigErrorInfo* einfo; - int len; + int len, is_over; + UChar parbuf[MAX_ERROR_PAR_LEN]; va_list vargs; va_init_list(vargs, code); @@ -210,23 +255,20 @@ onig_error_code_to_str(s, code, va_alist) case ONIGERR_INVALID_CHAR_IN_GROUP_NAME: case ONIGERR_INVALID_CHAR_PROPERTY_NAME: einfo = va_arg(vargs, OnigErrorInfo*); - len = einfo->par_end - einfo->par; + len = to_ascii(einfo->enc, einfo->par, einfo->par_end, + parbuf, MAX_ERROR_PAR_LEN - 3, &is_over); q = onig_error_code_to_format(code); p = s; while (*q != '\0') { if (*q == '%') { q++; if (*q == 'n') { /* '%n': name */ - if (len > MAX_ERROR_PAR_LEN) { - xmemcpy(p, einfo->par, MAX_ERROR_PAR_LEN - 3); - p += (MAX_ERROR_PAR_LEN - 3); + xmemcpy(p, parbuf, len); + p += len; + if (is_over != 0) { xmemcpy(p, "...", 3); p += 3; } - else { - xmemcpy(p, einfo->par, len); - p += len; - } q++; } else @@ -256,39 +298,36 @@ onig_error_code_to_str(s, code, va_alist) void #ifdef HAVE_STDARG_PROTOTYPES -onig_snprintf_with_pattern(char buf[], int bufsize, OnigEncoding enc, - char* pat, char* pat_end, char *fmt, ...) +onig_snprintf_with_pattern(UChar buf[], int bufsize, OnigEncoding enc, + UChar* pat, UChar* pat_end, const UChar *fmt, ...) #else onig_snprintf_with_pattern(buf, bufsize, enc, pat, pat_end, fmt, va_alist) - char buf[]; + UChar buf[]; int bufsize; OnigEncoding enc; - char* pat; - char* pat_end; - const char *fmt; + UChar* pat; + UChar* pat_end; + const UChar *fmt; va_dcl #endif { int n, need, len; UChar *p, *s, *bp; - char bs[6]; + UChar bs[6]; va_list args; va_init_list(args, fmt); - n = vsnprintf(buf, bufsize, fmt, args); - if (n < 0 || n >= bufsize) { - n = bufsize - 1; - } + n = vsnprintf((char* )buf, bufsize, (const char* )fmt, args); va_end(args); need = (pat_end - pat) * 4 + 4; if (n + need < bufsize) { - strcat(buf, ": /"); + strcat((char* )buf, ": /"); s = buf + onigenc_str_bytelen_null(ONIG_ENCODING_ASCII, buf); p = pat; - while (p < (UChar* )pat_end) { + while (p < pat_end) { if (*p == MC_ESC(enc)) { *s++ = *p++; len = enc_len(enc, p); @@ -307,7 +346,7 @@ onig_snprintf_with_pattern(buf, bufsize, enc, pat, pat_end, fmt, va_alist) int blen; while (len-- > 0) { - sprintf(bs, "\\%03o", *p++ & 0377); + sprintf((char* )bs, "\\%03o", *p++ & 0377); blen = onigenc_str_bytelen_null(ONIG_ENCODING_ASCII, bs); bp = bs; while (blen-- > 0) *s++ = *bp++; @@ -316,7 +355,7 @@ onig_snprintf_with_pattern(buf, bufsize, enc, pat, pat_end, fmt, va_alist) } else if (!ONIGENC_IS_CODE_PRINT(enc, *p) && !ONIGENC_IS_CODE_SPACE(enc, *p)) { - sprintf(bs, "\\%03o", *p++ & 0377); + sprintf((char* )bs, "\\%03o", *p++ & 0377); len = onigenc_str_bytelen_null(ONIG_ENCODING_ASCII, bs); bp = bs; while (len-- > 0) *s++ = *bp++; diff --git a/ext/mbstring/oniguruma/regexec.c b/ext/mbstring/oniguruma/regexec.c index 25d97773fb..918aa67aa8 100644 --- a/ext/mbstring/oniguruma/regexec.c +++ b/ext/mbstring/oniguruma/regexec.c @@ -2,7 +2,7 @@ regexec.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2005 K.Kosako + * Copyright (c) 2002-2007 K.Kosako * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -29,6 +29,12 @@ #include "regint.h" +#ifdef USE_CRNL_AS_LINE_TERMINATOR +#define ONIGENC_IS_MBC_CRNL(enc,p,end) \ + (ONIGENC_MBC_TO_CODE(enc,p,end) == 13 && \ + ONIGENC_IS_MBC_NEWLINE(enc,(p+enc_len(enc,p)),end)) +#endif + #ifdef USE_CAPTURE_HISTORY static void history_tree_free(OnigCaptureTreeNode* node); @@ -70,7 +76,7 @@ history_root_free(OnigRegion* r) } static OnigCaptureTreeNode* -history_node_new() +history_node_new(void) { OnigCaptureTreeNode* node; @@ -227,7 +233,7 @@ onig_region_init(OnigRegion* region) } extern OnigRegion* -onig_region_new() +onig_region_new(void) { OnigRegion* r; @@ -300,6 +306,9 @@ typedef struct _StackType { UChar *pcode; /* byte code position */ UChar *pstr; /* string position */ UChar *pstr_prev; /* previous char position of pstr */ +#ifdef USE_COMBINATION_EXPLOSION_CHECK + unsigned int state_check; +#endif } state; struct { int count; /* for OP_REPEAT_INC, OP_REPEAT_INC_NG */ @@ -333,28 +342,28 @@ typedef struct _StackType { /* stack type */ /* used by normal-POP */ #define STK_ALT 0x0001 -#define STK_LOOK_BEHIND_NOT 0x0003 -#define STK_POS_NOT 0x0005 -/* avoided by normal-POP, but value should be small */ -#define STK_NULL_CHECK_START 0x0100 +#define STK_LOOK_BEHIND_NOT 0x0002 +#define STK_POS_NOT 0x0003 /* handled by normal-POP */ -#define STK_MEM_START 0x0200 -#define STK_MEM_END 0x0300 -#define STK_REPEAT_INC 0x0400 +#define STK_MEM_START 0x0100 +#define STK_MEM_END 0x8200 +#define STK_REPEAT_INC 0x0300 +#define STK_STATE_CHECK_MARK 0x1000 /* avoided by normal-POP */ +#define STK_NULL_CHECK_START 0x3000 +#define STK_NULL_CHECK_END 0x5000 /* for recursive call */ +#define STK_MEM_END_MARK 0x8400 #define STK_POS 0x0500 /* used when POP-POS */ #define STK_STOP_BT 0x0600 /* mark for "(?>...)" */ #define STK_REPEAT 0x0700 #define STK_CALL_FRAME 0x0800 #define STK_RETURN 0x0900 -#define STK_MEM_END_MARK 0x0a00 -#define STK_VOID 0x0b00 /* for fill a blank */ -#define STK_NULL_CHECK_END 0x0c00 /* for recursive call */ +#define STK_VOID 0x0a00 /* for fill a blank */ /* stack type check mask */ -#define STK_MASK_POP_USED 0x00ff -#define IS_TO_VOID_TARGET(stk) \ - (((stk)->type & STK_MASK_POP_USED) || (stk)->type == STK_NULL_CHECK_START) +#define STK_MASK_POP_USED 0x00ff +#define STK_MASK_TO_VOID_TARGET 0x10ff +#define STK_MASK_MEM_END_OR_MARK 0x8000 /* MEM_END or MEM_END_MARK */ typedef struct { void* stack_p; @@ -362,16 +371,72 @@ typedef struct { OnigOptionType options; OnigRegion* region; const UChar* start; /* search start position (for \G: BEGIN_POSITION) */ +#ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE + int best_len; /* for ONIG_OPTION_FIND_LONGEST */ + UChar* best_s; +#endif +#ifdef USE_COMBINATION_EXPLOSION_CHECK + void* state_check_buff; + int state_check_buff_size; +#endif } MatchArg; +#ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE +#define MATCH_ARG_INIT(msa, arg_option, arg_region, arg_start) do {\ + (msa).stack_p = (void* )0;\ + (msa).options = (arg_option);\ + (msa).region = (arg_region);\ + (msa).start = (arg_start);\ + (msa).best_len = ONIG_MISMATCH;\ +} while (0) +#else #define MATCH_ARG_INIT(msa, arg_option, arg_region, arg_start) do {\ - (msa).stack_p = (void* )0;\ - (msa).options = (arg_option);\ - (msa).region = (arg_region);\ - (msa).start = (arg_start);\ + (msa).stack_p = (void* )0;\ + (msa).options = (arg_option);\ + (msa).region = (arg_region);\ + (msa).start = (arg_start);\ +} while (0) +#endif + +#ifdef USE_COMBINATION_EXPLOSION_CHECK + +#define STATE_CHECK_BUFF_MALLOC_THRESHOLD_SIZE 16 + +#define STATE_CHECK_BUFF_INIT(msa, str_len, offset, state_num) do { \ + if ((state_num) > 0 && str_len >= STATE_CHECK_STRING_THRESHOLD_LEN) {\ + unsigned int size = (unsigned int )(((str_len) + 1) * (state_num) + 7) >> 3;\ + offset = ((offset) * (state_num)) >> 3;\ + if (size > 0 && offset < size && size < STATE_CHECK_BUFF_MAX_SIZE) {\ + if (size >= STATE_CHECK_BUFF_MALLOC_THRESHOLD_SIZE) \ + (msa).state_check_buff = (void* )xmalloc(size);\ + else \ + (msa).state_check_buff = (void* )xalloca(size);\ + xmemset(((char* )((msa).state_check_buff)+(offset)), 0, \ + (size_t )(size - (offset))); \ + (msa).state_check_buff_size = size;\ + }\ + else {\ + (msa).state_check_buff = (void* )0;\ + (msa).state_check_buff_size = 0;\ + }\ + }\ + else {\ + (msa).state_check_buff = (void* )0;\ + (msa).state_check_buff_size = 0;\ + }\ } while (0) -#define MATCH_ARG_FREE(msa) if ((msa).stack_p) xfree((msa).stack_p) +#define MATCH_ARG_FREE(msa) do {\ + if ((msa).stack_p) xfree((msa).stack_p);\ + if ((msa).state_check_buff_size >= STATE_CHECK_BUFF_MALLOC_THRESHOLD_SIZE) { \ + if ((msa).state_check_buff) xfree((msa).state_check_buff);\ + }\ +} while (0); +#else +#define STATE_CHECK_BUFF_INIT(msa, str_len, offset, state_num) +#define MATCH_ARG_FREE(msa) if ((msa).stack_p) xfree((msa).stack_p) +#endif + #define STACK_INIT(alloc_addr, ptr_num, stack_num) do {\ @@ -465,27 +530,89 @@ stack_double(StackType** arg_stk_base, StackType** arg_stk_end, #define STACK_AT(index) (stk_base + (index)) #define GET_STACK_INDEX(stk) ((stk) - stk_base) +#define STACK_PUSH_TYPE(stack_type) do {\ + STACK_ENSURE(1);\ + stk->type = (stack_type);\ + STACK_INC;\ +} while(0) + +#define IS_TO_VOID_TARGET(stk) (((stk)->type & STK_MASK_TO_VOID_TARGET) != 0) + +#ifdef USE_COMBINATION_EXPLOSION_CHECK +#define STATE_CHECK_POS(s,snum) \ + (((s) - str) * num_comb_exp_check + ((snum) - 1)) +#define STATE_CHECK_VAL(v,snum) do {\ + if (state_check_buff != NULL) {\ + int x = STATE_CHECK_POS(s,snum);\ + (v) = state_check_buff[x/8] & (1<<(x%8));\ + }\ + else (v) = 0;\ +} while(0) + + +#define ELSE_IF_STATE_CHECK_MARK(stk) \ + else if ((stk)->type == STK_STATE_CHECK_MARK) { \ + int x = STATE_CHECK_POS(stk->u.state.pstr, stk->u.state.state_check);\ + state_check_buff[x/8] |= (1<<(x%8)); \ + } + #define STACK_PUSH(stack_type,pat,s,sprev) do {\ STACK_ENSURE(1);\ stk->type = (stack_type);\ stk->u.state.pcode = (pat);\ stk->u.state.pstr = (s);\ stk->u.state.pstr_prev = (sprev);\ + stk->u.state.state_check = 0;\ STACK_INC;\ } while(0) #define STACK_PUSH_ENSURED(stack_type,pat) do {\ stk->type = (stack_type);\ stk->u.state.pcode = (pat);\ + stk->u.state.state_check = 0;\ STACK_INC;\ } while(0) -#define STACK_PUSH_TYPE(stack_type) do {\ +#define STACK_PUSH_ALT_WITH_STATE_CHECK(pat,s,sprev,snum) do {\ + STACK_ENSURE(1);\ + stk->type = STK_ALT;\ + stk->u.state.pcode = (pat);\ + stk->u.state.pstr = (s);\ + stk->u.state.pstr_prev = (sprev);\ + stk->u.state.state_check = ((state_check_buff != NULL) ? (snum) : 0);\ + STACK_INC;\ +} while(0) + +#define STACK_PUSH_STATE_CHECK(s,snum) do {\ + if (state_check_buff != NULL) {\ + STACK_ENSURE(1);\ + stk->type = STK_STATE_CHECK_MARK;\ + stk->u.state.pstr = (s);\ + stk->u.state.state_check = (snum);\ + STACK_INC;\ + }\ +} while(0) + +#else /* USE_COMBINATION_EXPLOSION_CHECK */ + +#define ELSE_IF_STATE_CHECK_MARK(stk) + +#define STACK_PUSH(stack_type,pat,s,sprev) do {\ STACK_ENSURE(1);\ stk->type = (stack_type);\ + stk->u.state.pcode = (pat);\ + stk->u.state.pstr = (s);\ + stk->u.state.pstr_prev = (sprev);\ STACK_INC;\ } while(0) +#define STACK_PUSH_ENSURED(stack_type,pat) do {\ + stk->type = (stack_type);\ + stk->u.state.pcode = (pat);\ + STACK_INC;\ +} while(0) +#endif /* USE_COMBINATION_EXPLOSION_CHECK */ + #define STACK_PUSH_ALT(pat,s,sprev) STACK_PUSH(STK_ALT,pat,s,sprev) #define STACK_PUSH_POS(s,sprev) STACK_PUSH(STK_POS,NULL_UCHARP,s,sprev) #define STACK_PUSH_POS_NOT(pat,s,sprev) STACK_PUSH(STK_POS_NOT,pat,s,sprev) @@ -544,7 +671,7 @@ stack_double(StackType** arg_stk_base, StackType** arg_stk_end, k = stk;\ while (k > stk_base) {\ k--;\ - if ((k->type == STK_MEM_END_MARK || k->type == STK_MEM_END) \ + if ((k->type & STK_MASK_MEM_END_OR_MARK) != 0 \ && k->u.mem.num == (mnum)) {\ level++;\ }\ @@ -603,15 +730,18 @@ stack_double(StackType** arg_stk_base, StackType** arg_stk_end, #ifdef ONIG_DEBUG -#define STACK_BASE_CHECK(p) \ - if ((p) < stk_base) goto stack_error; +#define STACK_BASE_CHECK(p, at) \ + if ((p) < stk_base) {\ + fprintf(stderr, "at %s\n", at);\ + goto stack_error;\ + } #else -#define STACK_BASE_CHECK(p) +#define STACK_BASE_CHECK(p, at) #endif #define STACK_POP_ONE do {\ stk--;\ - STACK_BASE_CHECK(stk); \ + STACK_BASE_CHECK(stk, "STACK_POP_ONE"); \ } while(0) #define STACK_POP do {\ @@ -619,25 +749,27 @@ stack_double(StackType** arg_stk_base, StackType** arg_stk_end, case STACK_POP_LEVEL_FREE:\ while (1) {\ stk--;\ - STACK_BASE_CHECK(stk); \ + STACK_BASE_CHECK(stk, "STACK_POP"); \ if ((stk->type & STK_MASK_POP_USED) != 0) break;\ + ELSE_IF_STATE_CHECK_MARK(stk);\ }\ break;\ case STACK_POP_LEVEL_MEM_START:\ while (1) {\ stk--;\ - STACK_BASE_CHECK(stk); \ + STACK_BASE_CHECK(stk, "STACK_POP 2"); \ if ((stk->type & STK_MASK_POP_USED) != 0) break;\ else if (stk->type == STK_MEM_START) {\ mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\ mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\ }\ + ELSE_IF_STATE_CHECK_MARK(stk);\ }\ break;\ default:\ while (1) {\ stk--;\ - STACK_BASE_CHECK(stk); \ + STACK_BASE_CHECK(stk, "STACK_POP 3"); \ if ((stk->type & STK_MASK_POP_USED) != 0) break;\ else if (stk->type == STK_MEM_START) {\ mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\ @@ -650,6 +782,7 @@ stack_double(StackType** arg_stk_base, StackType** arg_stk_end, mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\ mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\ }\ + ELSE_IF_STATE_CHECK_MARK(stk);\ }\ break;\ }\ @@ -658,7 +791,7 @@ stack_double(StackType** arg_stk_base, StackType** arg_stk_end, #define STACK_POP_TIL_POS_NOT do {\ while (1) {\ stk--;\ - STACK_BASE_CHECK(stk); \ + STACK_BASE_CHECK(stk, "STACK_POP_TIL_POS_NOT"); \ if (stk->type == STK_POS_NOT) break;\ else if (stk->type == STK_MEM_START) {\ mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\ @@ -671,13 +804,14 @@ stack_double(StackType** arg_stk_base, StackType** arg_stk_end, mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\ mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\ }\ + ELSE_IF_STATE_CHECK_MARK(stk);\ }\ } while(0) #define STACK_POP_TIL_LOOK_BEHIND_NOT do {\ while (1) {\ stk--;\ - STACK_BASE_CHECK(stk); \ + STACK_BASE_CHECK(stk, "STACK_POP_TIL_LOOK_BEHIND_NOT"); \ if (stk->type == STK_LOOK_BEHIND_NOT) break;\ else if (stk->type == STK_MEM_START) {\ mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\ @@ -690,6 +824,7 @@ stack_double(StackType** arg_stk_base, StackType** arg_stk_end, mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\ mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\ }\ + ELSE_IF_STATE_CHECK_MARK(stk);\ }\ } while(0) @@ -697,7 +832,7 @@ stack_double(StackType** arg_stk_base, StackType** arg_stk_end, k = stk;\ while (1) {\ k--;\ - STACK_BASE_CHECK(k); \ + STACK_BASE_CHECK(k, "STACK_POS_END"); \ if (IS_TO_VOID_TARGET(k)) {\ k->type = STK_VOID;\ }\ @@ -712,7 +847,7 @@ stack_double(StackType** arg_stk_base, StackType** arg_stk_end, StackType *k = stk;\ while (1) {\ k--;\ - STACK_BASE_CHECK(k); \ + STACK_BASE_CHECK(k, "STACK_STOP_BT_END"); \ if (IS_TO_VOID_TARGET(k)) {\ k->type = STK_VOID;\ }\ @@ -727,7 +862,7 @@ stack_double(StackType** arg_stk_base, StackType** arg_stk_end, StackType* k = stk;\ while (1) {\ k--;\ - STACK_BASE_CHECK(k); \ + STACK_BASE_CHECK(k, "STACK_NULL_CHECK"); \ if (k->type == STK_NULL_CHECK_START) {\ if (k->u.null_check.num == (id)) {\ (isnull) = (k->u.null_check.pstr == (s));\ @@ -742,7 +877,7 @@ stack_double(StackType** arg_stk_base, StackType** arg_stk_end, StackType* k = stk;\ while (1) {\ k--;\ - STACK_BASE_CHECK(k); \ + STACK_BASE_CHECK(k, "STACK_NULL_CHECK_REC"); \ if (k->type == STK_NULL_CHECK_START) {\ if (k->u.null_check.num == (id)) {\ if (level == 0) {\ @@ -762,7 +897,7 @@ stack_double(StackType** arg_stk_base, StackType** arg_stk_end, StackType* k = stk;\ while (1) {\ k--;\ - STACK_BASE_CHECK(k); \ + STACK_BASE_CHECK(k, "STACK_NULL_CHECK_MEMST"); \ if (k->type == STK_NULL_CHECK_START) {\ if (k->u.null_check.num == (id)) {\ if (k->u.null_check.pstr != (s)) {\ @@ -802,7 +937,7 @@ stack_double(StackType** arg_stk_base, StackType** arg_stk_end, StackType* k = stk;\ while (1) {\ k--;\ - STACK_BASE_CHECK(k); \ + STACK_BASE_CHECK(k, "STACK_NULL_CHECK_MEMST_REC"); \ if (k->type == STK_NULL_CHECK_START) {\ if (k->u.null_check.num == (id)) {\ if (level == 0) {\ @@ -850,7 +985,7 @@ stack_double(StackType** arg_stk_base, StackType** arg_stk_end, k = stk;\ while (1) {\ k--;\ - STACK_BASE_CHECK(k); \ + STACK_BASE_CHECK(k, "STACK_GET_REPEAT"); \ if (k->type == STK_REPEAT) {\ if (level == 0) {\ if (k->u.repeat.num == (id)) {\ @@ -868,7 +1003,7 @@ stack_double(StackType** arg_stk_base, StackType** arg_stk_end, StackType* k = stk;\ while (1) {\ k--;\ - STACK_BASE_CHECK(k); \ + STACK_BASE_CHECK(k, "STACK_RETURN"); \ if (k->type == STK_CALL_FRAME) {\ if (level == 0) {\ (addr) = k->u.call_frame.ret_addr;\ @@ -937,6 +1072,7 @@ static int string_cmp_ic(OnigEncoding enc, int ambig_flag, is_fail = 0; \ } while(0) + #define ON_STR_BEGIN(s) ((s) == str) #define ON_STR_END(s) ((s) == end) #define IS_EMPTY_STR (str == end) @@ -988,6 +1124,77 @@ make_capture_history_tree(OnigCaptureTreeNode* node, StackType** kp, } #endif +#ifdef USE_BACKREF_AT_LEVEL +static int mem_is_in_memp(int mem, int num, UChar* memp) +{ + int i; + MemNumType m; + + for (i = 0; i < num; i++) { + GET_MEMNUM_INC(m, memp); + if (mem == (int )m) return 1; + } + return 0; +} + +static int backref_match_at_nested_level(regex_t* reg + , StackType* top, StackType* stk_base + , int ignore_case, int ambig_flag + , int nest, int mem_num, UChar* memp, UChar** s, const UChar* send) +{ + UChar *ss, *p, *pstart, *pend = NULL_UCHARP; + int level; + StackType* k; + + level = 0; + k = top; + k--; + while (k >= stk_base) { + if (k->type == STK_CALL_FRAME) { + level--; + } + else if (k->type == STK_RETURN) { + level++; + } + else if (level == nest) { + if (k->type == STK_MEM_START) { + if (mem_is_in_memp(k->u.mem.num, mem_num, memp)) { + pstart = k->u.mem.pstr; + if (pend != NULL_UCHARP) { + if (pend - pstart > send - *s) return 0; /* or goto next_mem; */ + p = pstart; + ss = *s; + + if (ignore_case != 0) { + if (string_cmp_ic(reg->enc, ambig_flag, + pstart, &ss, (int )(pend - pstart)) == 0) + return 0; /* or goto next_mem; */ + } + else { + while (p < pend) { + if (*p++ != *ss++) return 0; /* or goto next_mem; */ + } + } + + *s = ss; + return 1; + } + } + } + else if (k->type == STK_MEM_END) { + if (mem_is_in_memp(k->u.mem.num, mem_num, memp)) { + pend = k->u.mem.pstr; + } + } + } + k--; + } + + return 0; +} +#endif /* USE_BACKREF_AT_LEVEL */ + + #ifdef RUBY_PLATFORM typedef struct { @@ -1003,7 +1210,7 @@ trap_ensure(VALUE arg) TrapEnsureArg* ta = (TrapEnsureArg* )arg; if (ta->state == 0) { /* trap_exec() is not normal return */ - ONIG_STATE_DEC(ta->reg); + ONIG_STATE_DEC_THREAD(ta->reg); if (! IS_NULL(ta->msa->stack_p) && ta->stk_base != ta->msa->stack_p) xfree(ta->stk_base); @@ -1098,14 +1305,14 @@ static int MaxStackDepth = 0; /* * :nodoc: */ -static VALUE onig_stat_print() +static VALUE onig_stat_print(void) { onig_print_statistics(stderr); return Qnil; } #endif -extern void onig_statistics_init() +extern void onig_statistics_init(void) { int i; for (i = 0; i < 256; i++) { @@ -1165,27 +1372,43 @@ onig_is_in_code_range(const UChar* p, OnigCodePoint code) } static int -code_is_in_cclass_node(void* node, OnigCodePoint code, int enclen) +is_code_in_cc(int enclen, OnigCodePoint code, CClassNode* cc) { - unsigned int in_cc; - CClassNode* cc = (CClassNode* )node; + int found; - if (enclen == 1) { - in_cc = BITSET_AT(cc->bs, code); + if (enclen > 1 || (code >= SINGLE_BYTE_SIZE)) { + if (IS_NULL(cc->mbuf)) { + found = 0; + } + else { + found = (onig_is_in_code_range(cc->mbuf->p, code) != 0 ? 1 : 0); + } } else { - UChar* p = ((BBuf* )(cc->mbuf))->p; - in_cc = onig_is_in_code_range(p, code); + found = (BITSET_AT(cc->bs, code) == 0 ? 0 : 1); } - if (IS_CCLASS_NOT(cc)) { - return (in_cc ? 0 : 1); + if (IS_CCLASS_NOT(cc)) + return !found; + else + return found; +} + +extern int +onig_is_code_in_cc(OnigEncoding enc, OnigCodePoint code, CClassNode* cc) +{ + int len; + + if (ONIGENC_MBC_MINLEN(enc) > 1) { + len = 2; } else { - return (in_cc ? 1 : 0); + len = ONIGENC_CODE_TO_MBCLEN(enc, code); } + return is_code_in_cc(len, code, cc); } + /* matching region of POSIX API */ typedef int regoff_t; @@ -1217,6 +1440,11 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart, StackIndex si; StackIndex *repeat_stk; StackIndex *mem_start_stk, *mem_end_stk; +#ifdef USE_COMBINATION_EXPLOSION_CHECK + int scv; + unsigned char* state_check_buff = msa->state_check_buff; + int num_comb_exp_check = reg->num_comb_exp_check; +#endif n = reg->num_repeat + reg->num_mem * 2; STACK_INIT(alloca_base, n, INIT_MATCH_STACK_SIZE); @@ -1270,8 +1498,19 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart, case OP_END: STAT_OP_IN(OP_END); n = s - sstart; if (n > best_len) { - OnigRegion* region = msa->region; + OnigRegion* region; +#ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE + if (IS_FIND_LONGEST(option)) { + if (n > msa->best_len) { + msa->best_len = n; + msa->best_s = (UChar* )sstart; + } + else + goto end_best_len; + } +#endif best_len = n; + region = msa->region; if (region) { #ifdef USE_POSIX_REGION_OPTION if (IS_POSIX_REGION(msa->options)) { @@ -1347,6 +1586,10 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart, #endif } /* if (region) */ } /* n > best_len */ + +#ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE + end_best_len: +#endif STAT_OP_OUT; if (IS_FIND_CONDITION(option)) { @@ -1384,24 +1627,12 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart, ss = s; sp = p; - exact1_ic_retry: len = ONIGENC_MBC_TO_NORMALIZE(encode, ambig_flag, &s, end, lowbuf); DATA_ENSURE(0); q = lowbuf; while (len-- > 0) { if (*p != *q) { -#if 1 - if ((ambig_flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0) { - ambig_flag &= ~ONIGENC_AMBIGUOUS_MATCH_COMPOUND; - s = ss; - p = sp; - goto exact1_ic_retry; - } - else - goto fail; -#else goto fail; -#endif } p++; q++; } @@ -1490,24 +1721,12 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart, ss = s; sp = p; - exactn_ic_retry: len = ONIGENC_MBC_TO_NORMALIZE(encode, ambig_flag, &s, end, lowbuf); DATA_ENSURE(0); q = lowbuf; while (len-- > 0) { if (*p != *q) { -#if 1 - if ((ambig_flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0) { - ambig_flag &= ~ONIGENC_AMBIGUOUS_MATCH_COMPOUND; - s = ss; - p = sp; - goto exactn_ic_retry; - } - else - goto fail; -#else goto fail; -#endif } p++; q++; } @@ -1739,8 +1958,9 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart, mb_len = enc_len(encode, s); ss = s; s += mb_len; + DATA_ENSURE(0); code = ONIGENC_MBC_TO_CODE(encode, ss, s); - if (code_is_in_cclass_node(node, code, mb_len) == 0) goto fail; + if (is_code_in_cc(mb_len, code, node) == 0) goto fail; } STAT_OP_OUT; break; @@ -1826,6 +2046,47 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart, STAT_OP_OUT; break; +#ifdef USE_COMBINATION_EXPLOSION_CHECK + case OP_STATE_CHECK_ANYCHAR_STAR: STAT_OP_IN(OP_STATE_CHECK_ANYCHAR_STAR); + GET_STATE_CHECK_NUM_INC(mem, p); + while (s < end) { + STATE_CHECK_VAL(scv, mem); + if (scv) goto fail; + + STACK_PUSH_ALT_WITH_STATE_CHECK(p, s, sprev, mem); + n = enc_len(encode, s); + DATA_ENSURE(n); + if (ONIGENC_IS_MBC_NEWLINE(encode, s, end)) goto fail; + sprev = s; + s += n; + } + STAT_OP_OUT; + break; + + case OP_STATE_CHECK_ANYCHAR_ML_STAR: + STAT_OP_IN(OP_STATE_CHECK_ANYCHAR_ML_STAR); + + GET_STATE_CHECK_NUM_INC(mem, p); + while (s < end) { + STATE_CHECK_VAL(scv, mem); + if (scv) goto fail; + + STACK_PUSH_ALT_WITH_STATE_CHECK(p, s, sprev, mem); + n = enc_len(encode, s); + if (n > 1) { + DATA_ENSURE(n); + sprev = s; + s += n; + } + else { + sprev = s; + s++; + } + } + STAT_OP_OUT; + break; +#endif /* USE_COMBINATION_EXPLOSION_CHECK */ + case OP_WORD: STAT_OP_IN(OP_WORD); DATA_ENSURE(1); if (! ONIGENC_IS_MBC_WORD(encode, s, end)) @@ -1946,6 +2207,12 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart, STAT_OP_OUT; continue; } +#ifdef USE_CRNL_AS_LINE_TERMINATOR + else if (ONIGENC_IS_MBC_CRNL(encode, s, end)) { + STAT_OP_OUT; + continue; + } +#endif goto fail; break; @@ -1966,6 +2233,15 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart, STAT_OP_OUT; continue; } +#ifdef USE_CRNL_AS_LINE_TERMINATOR + else if (ONIGENC_IS_MBC_CRNL(encode, s, end)) { + UChar* ss = s + enc_len(encode, s); + if (ON_STR_END(ss + enc_len(encode, ss))) { + STAT_OP_OUT; + continue; + } + } +#endif goto fail; break; @@ -2041,11 +2317,6 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart, goto backref; break; - case OP_BACKREF3: STAT_OP_IN(OP_BACKREF3); - mem = 3; - goto backref; - break; - case OP_BACKREFN: STAT_OP_IN(OP_BACKREFN); GET_MEMNUM_INC(mem, p); backref: @@ -2188,6 +2459,35 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart, continue; } break; + +#ifdef USE_BACKREF_AT_LEVEL + case OP_BACKREF_AT_LEVEL: + { + int len; + OnigOptionType ic; + LengthType level; + + GET_OPTION_INC(ic, p); + GET_LENGTH_INC(level, p); + GET_LENGTH_INC(tlen, p); + + sprev = s; + if (backref_match_at_nested_level(reg, stk, stk_base, ic, ambig_flag + , (int )level, (int )tlen, p, &s, end)) { + while (sprev + (len = enc_len(encode, sprev)) < s) + sprev += len; + + p += (SIZE_MEMNUM * tlen); + } + else + goto fail; + + STAT_OP_OUT; + continue; + } + + break; +#endif case OP_SET_OPTION_PUSH: STAT_OP_IN(OP_SET_OPTION_PUSH); GET_OPTION_INC(option, p); @@ -2309,6 +2609,43 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart, continue; break; +#ifdef USE_COMBINATION_EXPLOSION_CHECK + case OP_STATE_CHECK_PUSH: STAT_OP_IN(OP_STATE_CHECK_PUSH); + GET_STATE_CHECK_NUM_INC(mem, p); + STATE_CHECK_VAL(scv, mem); + if (scv) goto fail; + + GET_RELADDR_INC(addr, p); + STACK_PUSH_ALT_WITH_STATE_CHECK(p + addr, s, sprev, mem); + STAT_OP_OUT; + continue; + break; + + case OP_STATE_CHECK_PUSH_OR_JUMP: STAT_OP_IN(OP_STATE_CHECK_PUSH_OR_JUMP); + GET_STATE_CHECK_NUM_INC(mem, p); + GET_RELADDR_INC(addr, p); + STATE_CHECK_VAL(scv, mem); + if (scv) { + p += addr; + } + else { + STACK_PUSH_ALT_WITH_STATE_CHECK(p + addr, s, sprev, mem); + } + STAT_OP_OUT; + continue; + break; + + case OP_STATE_CHECK: STAT_OP_IN(OP_STATE_CHECK); + GET_STATE_CHECK_NUM_INC(mem, p); + STATE_CHECK_VAL(scv, mem); + if (scv) goto fail; + + STACK_PUSH_STATE_CHECK(s, mem); + STAT_OP_OUT; + continue; + break; +#endif /* USE_COMBINATION_EXPLOSION_CHECK */ + case OP_POP: STAT_OP_IN(OP_POP); STACK_POP_ONE; STAT_OP_OUT; @@ -2383,7 +2720,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart, repeat_inc: stkp->u.repeat.count++; - if (stkp->u.repeat.count == reg->repeat_range[mem].upper) { + if (stkp->u.repeat.count >= reg->repeat_range[mem].upper) { /* end of repeat. Nothing to do. */ } else if (stkp->u.repeat.count >= reg->repeat_range[mem].lower) { @@ -2413,8 +2750,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart, repeat_inc_ng: stkp->u.repeat.count++; - if (stkp->u.repeat.count < reg->repeat_range[mem].upper || - IS_REPEAT_INFINITE(reg->repeat_range[mem].upper)) { + if (stkp->u.repeat.count < reg->repeat_range[mem].upper) { if (stkp->u.repeat.count >= reg->repeat_range[mem].lower) { UChar* pcode = stkp->u.repeat.pcode; @@ -2543,6 +2879,14 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart, p = stk->u.state.pcode; s = stk->u.state.pstr; sprev = stk->u.state.pstr_prev; + +#ifdef USE_COMBINATION_EXPLOSION_CHECK + if (stk->u.state.state_check != 0) { + stk->type = STK_STATE_CHECK_MARK; + stk++; + } +#endif + STAT_OP_OUT; continue; break; @@ -2618,20 +2962,12 @@ str_lower_case_match(OnigEncoding enc, int ambig_flag, tsave = t; psave = p; - retry: while (t < tend) { lowlen = ONIGENC_MBC_TO_NORMALIZE(enc, ambig_flag, &p, end, lowbuf); q = lowbuf; while (lowlen > 0) { if (*t++ != *q++) { - if ((ambig_flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0) { - ambig_flag &= ~ONIGENC_AMBIGUOUS_MATCH_COMPOUND; - t = tsave; - p = psave; - goto retry; - } - else - return 0; + return 0; } lowlen--; } @@ -2727,66 +3063,56 @@ bm_search_notrev(regex_t* reg, const UChar* target, const UChar* target_end, const UChar* text, const UChar* text_end, const UChar* text_range) { - const UChar *s, *t, *p, *end; + const UChar *s, *se, *t, *p, *end; const UChar *tail; - int skip; + int skip, tlen1; #ifdef ONIG_DEBUG_SEARCH fprintf(stderr, "bm_search_notrev: text: %d, text_end: %d, text_range: %d\n", (int )text, (int )text_end, (int )text_range); #endif - end = text_range + (target_end - target) - 1; - if (end > text_end) - end = text_end; - tail = target_end - 1; + tlen1 = tail - target; + end = text_range; + if (end + tlen1 > text_end) + end = text_end - tlen1; + s = text; - while ((s - text) < target_end - target) { - s += enc_len(reg->enc, s); - } - s--; /* set to text check tail position. */ if (IS_NULL(reg->int_map)) { while (s < end) { - p = s; + p = se = s + tlen1; t = tail; while (t >= target && *p == *t) { - p--; t--; + p--; t--; } - if (t < target) return (UChar* )(p + 1); + if (t < target) return (UChar* )s; - skip = reg->map[*s]; - p = s + 1; - if (p >= text_end) return (UChar* )NULL; - t = p; + skip = reg->map[*se]; + t = s; do { - p += enc_len(reg->enc, p); - } while ((p - t) < skip && p < text_end); - - s += (p - t); + s += enc_len(reg->enc, s); + } while ((s - t) < skip && s < end); } } else { while (s < end) { - p = s; + p = se = s + tlen1; t = tail; while (t >= target && *p == *t) { - p--; t--; + p--; t--; } - if (t < target) return (UChar* )(p + 1); + if (t < target) return (UChar* )s; - skip = reg->int_map[*s]; - p = s + 1; - if (p >= text_end) return (UChar* )NULL; - t = p; + skip = reg->int_map[*se]; + t = s; do { - p += enc_len(reg->enc, p); - } while ((p - t) < skip && p < text_end); - - s += (p - t); + s += enc_len(reg->enc, s); + } while ((s - t) < skip && s < end); } } + return (UChar* )NULL; } @@ -2915,7 +3241,9 @@ onig_match(regex_t* reg, const UChar* str, const UChar* end, const UChar* at, On UChar *prev; MatchArg msa; -#ifdef USE_MULTI_THREAD_SYSTEM +#if defined(USE_RECOMPILE_API) && defined(USE_MULTI_THREAD_SYSTEM) + start: + THREAD_ATOMIC_START; if (ONIG_STATE(reg) >= ONIG_STATE_NORMAL) { ONIG_STATE_INC(reg); if (IS_NOT_NULL(reg->chain) && ONIG_STATE(reg) == ONIG_STATE_NORMAL) { @@ -2924,17 +3252,27 @@ onig_match(regex_t* reg, const UChar* str, const UChar* end, const UChar* at, On } } else { - int n = 0; + int n; + + THREAD_ATOMIC_END; + n = 0; while (ONIG_STATE(reg) < ONIG_STATE_NORMAL) { if (++n > THREAD_PASS_LIMIT_COUNT) return ONIGERR_OVER_THREAD_PASS_LIMIT_COUNT; THREAD_PASS; } - ONIG_STATE_INC(reg); + goto start; } -#endif /* USE_MULTI_THREAD_SYSTEM */ + THREAD_ATOMIC_END; +#endif /* USE_RECOMPILE_API && USE_MULTI_THREAD_SYSTEM */ MATCH_ARG_INIT(msa, option, region, at); +#ifdef USE_COMBINATION_EXPLOSION_CHECK + { + int offset = at - str; + STATE_CHECK_BUFF_INIT(msa, end - str, offset, reg->num_comb_exp_check); + } +#endif if (region #ifdef USE_POSIX_REGION_OPTION @@ -2952,7 +3290,7 @@ onig_match(regex_t* reg, const UChar* str, const UChar* end, const UChar* at, On } MATCH_ARG_FREE(msa); - ONIG_STATE_DEC(reg); + ONIG_STATE_DEC_THREAD(reg); return r; } @@ -3029,7 +3367,11 @@ forward_search_range(regex_t* reg, const UChar* str, const UChar* end, UChar* s, if (prev && ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end)) goto retry_gate; } - else if (!ONIGENC_IS_MBC_NEWLINE(reg->enc, p, end)) + else if (! ONIGENC_IS_MBC_NEWLINE(reg->enc, p, end) +#ifdef USE_CRNL_AS_LINE_TERMINATOR + && ! ONIGENC_IS_MBC_CRNL(reg->enc, p, end) +#endif + ) goto retry_gate; break; } @@ -3132,7 +3474,7 @@ backward_search_range(regex_t* reg, const UChar* str, const UChar* end, switch (reg->sub_anchor) { case ANCHOR_BEGIN_LINE: if (!ON_STR_BEGIN(p)) { - prev = onigenc_get_prev_char_head(reg->enc, adjrange, p); + prev = onigenc_get_prev_char_head(reg->enc, str, p); if (!ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end)) { p = prev; goto retry; @@ -3149,7 +3491,11 @@ backward_search_range(regex_t* reg, const UChar* str, const UChar* end, goto retry; } } - else if (!ONIGENC_IS_MBC_NEWLINE(reg->enc, p, end)) { + else if (! ONIGENC_IS_MBC_NEWLINE(reg->enc, p, end) +#ifdef USE_CRNL_AS_LINE_TERMINATOR + && ! ONIGENC_IS_MBC_CRNL(reg->enc, p, end) +#endif + ) { p = onigenc_get_prev_char_head(reg->enc, adjrange, p); if (IS_NULL(p)) goto fail; goto retry; @@ -3187,8 +3533,11 @@ onig_search(regex_t* reg, const UChar* str, const UChar* end, int r; UChar *s, *prev; MatchArg msa; + const UChar *orig_start = start; -#ifdef USE_MULTI_THREAD_SYSTEM +#if defined(USE_RECOMPILE_API) && defined(USE_MULTI_THREAD_SYSTEM) + start: + THREAD_ATOMIC_START; if (ONIG_STATE(reg) >= ONIG_STATE_NORMAL) { ONIG_STATE_INC(reg); if (IS_NOT_NULL(reg->chain) && ONIG_STATE(reg) == ONIG_STATE_NORMAL) { @@ -3197,15 +3546,19 @@ onig_search(regex_t* reg, const UChar* str, const UChar* end, } } else { - int n = 0; + int n; + + THREAD_ATOMIC_END; + n = 0; while (ONIG_STATE(reg) < ONIG_STATE_NORMAL) { if (++n > THREAD_PASS_LIMIT_COUNT) return ONIGERR_OVER_THREAD_PASS_LIMIT_COUNT; THREAD_PASS; } - ONIG_STATE_INC(reg); + goto start; } -#endif /* USE_MULTI_THREAD_SYSTEM */ + THREAD_ATOMIC_END; +#endif /* USE_RECOMPILE_API && USE_MULTI_THREAD_SYSTEM */ #ifdef ONIG_DEBUG_SEARCH fprintf(stderr, @@ -3224,16 +3577,31 @@ onig_search(regex_t* reg, const UChar* str, const UChar* end, if (start > end || start < str) goto mismatch_no_msa; +#ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE #define MATCH_AND_RETURN_CHECK \ r = match_at(reg, str, end, s, prev, &msa);\ if (r != ONIG_MISMATCH) {\ - if (r >= 0) goto match;\ - goto finish; /* error */ \ + if (r >= 0) {\ + if (! IS_FIND_LONGEST(reg->options)) {\ + goto match;\ + }\ + }\ + else goto finish; /* error */ \ } +#else +#define MATCH_AND_RETURN_CHECK \ + r = match_at(reg, str, end, s, prev, &msa);\ + if (r != ONIG_MISMATCH) {\ + if (r >= 0) {\ + goto match;\ + }\ + else goto finish; /* error */ \ + } +#endif /* anchor optimize: resume search range */ if (reg->anchor != 0 && str < end) { - UChar* semi_end; + UChar *min_semi_end, *max_semi_end; if (reg->anchor & ANCHOR_BEGIN_POSITION) { /* search start-position only */ @@ -3259,58 +3627,67 @@ onig_search(regex_t* reg, const UChar* str, const UChar* end, } } else if (reg->anchor & ANCHOR_END_BUF) { - semi_end = (UChar* )end; + min_semi_end = max_semi_end = (UChar* )end; end_buf: - if ((OnigDistance )(semi_end - str) < reg->anchor_dmin) + if ((OnigDistance )(max_semi_end - str) < reg->anchor_dmin) goto mismatch_no_msa; if (range > start) { - if ((OnigDistance )(semi_end - start) > reg->anchor_dmax) { - start = semi_end - reg->anchor_dmax; + if ((OnigDistance )(min_semi_end - start) > reg->anchor_dmax) { + start = min_semi_end - reg->anchor_dmax; if (start < end) start = onigenc_get_right_adjust_char_head(reg->enc, str, start); else { /* match with empty at end */ start = onigenc_get_prev_char_head(reg->enc, str, end); } } - if ((OnigDistance )(semi_end - (range - 1)) < reg->anchor_dmin) { - range = semi_end - reg->anchor_dmin + 1; + if ((OnigDistance )(max_semi_end - (range - 1)) < reg->anchor_dmin) { + range = max_semi_end - reg->anchor_dmin + 1; } if (start >= range) goto mismatch_no_msa; } else { - if ((OnigDistance )(semi_end - range) > reg->anchor_dmax) { - range = semi_end - reg->anchor_dmax; + if ((OnigDistance )(min_semi_end - range) > reg->anchor_dmax) { + range = min_semi_end - reg->anchor_dmax; } - if ((OnigDistance )(semi_end - start) < reg->anchor_dmin) { - start = semi_end - reg->anchor_dmin; + if ((OnigDistance )(max_semi_end - start) < reg->anchor_dmin) { + start = max_semi_end - reg->anchor_dmin; start = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, str, start); - if (range > start) goto mismatch_no_msa; } + if (range > start) goto mismatch_no_msa; } } else if (reg->anchor & ANCHOR_SEMI_END_BUF) { UChar* pre_end = ONIGENC_STEP_BACK(reg->enc, str, end, 1); + max_semi_end = (UChar* )end; if (ONIGENC_IS_MBC_NEWLINE(reg->enc, pre_end, end)) { - semi_end = pre_end; - if (semi_end > str && start <= semi_end) { + min_semi_end = pre_end; + +#ifdef USE_CRNL_AS_LINE_TERMINATOR + pre_end = ONIGENC_STEP_BACK(reg->enc, str, pre_end, 1); + if (IS_NOT_NULL(pre_end) && + ONIGENC_IS_MBC_CRNL(reg->enc, pre_end, end)) { + min_semi_end = pre_end; + } +#endif + if (min_semi_end > str && start <= min_semi_end) { goto end_buf; } } else { - semi_end = (UChar* )end; + min_semi_end = (UChar* )end; goto end_buf; } } - else if ((reg->anchor & ANCHOR_ANYCHAR_STAR_PL)) { + else if ((reg->anchor & ANCHOR_ANYCHAR_STAR_ML)) { goto begin_position; } } else if (str == end) { /* empty string */ - static const UChar* address_for_empty_string = ""; + static const UChar* address_for_empty_string = (UChar* )""; #ifdef ONIG_DEBUG_SEARCH fprintf(stderr, "onig_search: empty string.\n"); @@ -3322,6 +3699,10 @@ onig_search(regex_t* reg, const UChar* str, const UChar* end, prev = (UChar* )NULL; MATCH_ARG_INIT(msa, option, region, start); +#ifdef USE_COMBINATION_EXPLOSION_CHECK + msa.state_check_buff = (void* )0; + msa.state_check_buff_size = 0; +#endif MATCH_AND_RETURN_CHECK; goto mismatch; } @@ -3333,7 +3714,13 @@ onig_search(regex_t* reg, const UChar* str, const UChar* end, (int )(end - str), (int )(start - str), (int )(range - str)); #endif - MATCH_ARG_INIT(msa, option, region, start); + MATCH_ARG_INIT(msa, option, region, orig_start); +#ifdef USE_COMBINATION_EXPLOSION_CHECK + { + int offset = (MIN(start, range) - str); + STATE_CHECK_BUFF_INIT(msa, end - str, offset, reg->num_comb_exp_check); + } +#endif s = (UChar* )start; if (range > start) { /* forward search */ @@ -3398,7 +3785,11 @@ onig_search(regex_t* reg, const UChar* str, const UChar* end, MATCH_AND_RETURN_CHECK; prev = s; s += enc_len(reg->enc, s); - } while (s <= range); /* exec s == range, because empty match with /$/. */ + } while (s < range); + + if (s == range) { /* because empty match with /$/. */ + MATCH_AND_RETURN_CHECK; + } } else { /* backward search */ if (reg->optimize != ONIG_OPTIMIZE_NONE) { @@ -3457,11 +3848,19 @@ onig_search(regex_t* reg, const UChar* str, const UChar* end, } mismatch: +#ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE + if (IS_FIND_LONGEST(reg->options)) { + if (msa.best_len >= 0) { + s = msa.best_s; + goto match; + } + } +#endif r = ONIG_MISMATCH; finish: MATCH_ARG_FREE(msa); - ONIG_STATE_DEC(reg); + ONIG_STATE_DEC_THREAD(reg); /* If result is mismatch and no FIND_NOT_EMPTY option, then the region is not setted in match_at(). */ @@ -3482,7 +3881,7 @@ onig_search(regex_t* reg, const UChar* str, const UChar* end, mismatch_no_msa: r = ONIG_MISMATCH; finish_no_msa: - ONIG_STATE_DEC(reg); + ONIG_STATE_DEC_THREAD(reg); #ifdef ONIG_DEBUG if (r != ONIG_MISMATCH) fprintf(stderr, "onig_search: error %d\n", r); @@ -3490,7 +3889,7 @@ onig_search(regex_t* reg, const UChar* str, const UChar* end, return r; match: - ONIG_STATE_DEC(reg); + ONIG_STATE_DEC_THREAD(reg); MATCH_ARG_FREE(msa); return s - str; } diff --git a/ext/mbstring/oniguruma/regext.c b/ext/mbstring/oniguruma/regext.c index 6839708be7..f5ad1f35a2 100755 --- a/ext/mbstring/oniguruma/regext.c +++ b/ext/mbstring/oniguruma/regext.c @@ -2,7 +2,7 @@ regext.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2005 K.Kosako + * Copyright (c) 2002-2006 K.Kosako * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -194,6 +194,7 @@ onig_new_deluxe(regex_t** reg, const UChar* pattern, const UChar* pattern_end, return r; } +#ifdef USE_RECOMPILE_API extern int onig_recompile_deluxe(regex_t* reg, const UChar* pattern, const UChar* pattern_end, OnigCompileInfo* ci, OnigErrorInfo* einfo) @@ -211,3 +212,4 @@ onig_recompile_deluxe(regex_t* reg, const UChar* pattern, const UChar* pattern_e } return 0; } +#endif diff --git a/ext/mbstring/oniguruma/reggnu.c b/ext/mbstring/oniguruma/reggnu.c index 70e8582ff2..248957c9d9 100644 --- a/ext/mbstring/oniguruma/reggnu.c +++ b/ext/mbstring/oniguruma/reggnu.c @@ -2,7 +2,7 @@ reggnu.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2005 K.Kosako + * Copyright (c) 2002-2006 K.Kosako * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -93,6 +93,7 @@ re_compile_pattern(const char* pattern, int size, regex_t* reg, char* ebuf) return r; } +#ifdef USE_RECOMPILE_API extern int re_recompile_pattern(const char* pattern, int size, regex_t* reg, char* ebuf) { @@ -113,6 +114,7 @@ re_recompile_pattern(const char* pattern, int size, regex_t* reg, char* ebuf) } return r; } +#endif extern void re_free_pattern(regex_t* reg) @@ -151,16 +153,16 @@ re_mbcinit(int mb_code) OnigEncoding enc; switch (mb_code) { - case MBCTYPE_ASCII: + case RE_MBCTYPE_ASCII: enc = ONIG_ENCODING_ASCII; break; - case MBCTYPE_EUC: + case RE_MBCTYPE_EUC: enc = ONIG_ENCODING_EUC_JP; break; - case MBCTYPE_SJIS: + case RE_MBCTYPE_SJIS: enc = ONIG_ENCODING_SJIS; break; - case MBCTYPE_UTF8: + case RE_MBCTYPE_UTF8: enc = ONIG_ENCODING_UTF8; break; default: diff --git a/ext/mbstring/oniguruma/regint.h b/ext/mbstring/oniguruma/regint.h index 11f3c43af9..d6819d8f94 100644 --- a/ext/mbstring/oniguruma/regint.h +++ b/ext/mbstring/oniguruma/regint.h @@ -4,7 +4,7 @@ regint.h - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2005 K.Kosako + * Copyright (c) 2002-2007 K.Kosako * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -62,10 +62,16 @@ #define USE_INFINITE_REPEAT_MONOMANIAC_MEM_STATUS_CHECK /* /(?:()|())*\2/ */ #define USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE /* /\n$/ =~ "\n" */ #define USE_WARNING_REDUNDANT_NESTED_REPEAT_OPERATOR +/* #define USE_RECOMPILE_API */ +/* treat \r\n as line terminator. + !!! NO SUPPORT !!! + use this configuration on your own responsibility */ +/* #define USE_CRNL_AS_LINE_TERMINATOR */ + /* internal config */ #define USE_RECYCLE_NODE #define USE_OP_PUSH_OR_JUMP_EXACT -#define USE_QUALIFIER_PEEK_NEXT +#define USE_QUANTIFIER_PEEK_NEXT #define USE_ST_HASH_TABLE #define USE_SHARED_CCLASS_TABLE @@ -75,64 +81,78 @@ /* interface to external system */ #ifdef NOT_RUBY /* given from Makefile */ #include "config.h" +#define USE_BACKREF_AT_LEVEL #define USE_CAPTURE_HISTORY #define USE_VARIABLE_META_CHARS #define USE_WORD_BEGIN_END /* "\<": word-begin, "\>": word-end */ #define USE_POSIX_REGION_OPTION /* needed for POSIX API support */ +#define USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE +/* #define USE_COMBINATION_EXPLOSION_CHECK */ /* (X*)* */ /* #define USE_MULTI_THREAD_SYSTEM */ +#define THREAD_SYSTEM_INIT /* depend on thread system */ +#define THREAD_SYSTEM_END /* depend on thread system */ #define THREAD_ATOMIC_START /* depend on thread system */ #define THREAD_ATOMIC_END /* depend on thread system */ #define THREAD_PASS /* depend on thread system */ -#define CHECK_INTERRUPT /* depend on application */ #define xmalloc malloc #define xrealloc realloc #define xcalloc calloc #define xfree free #else #include "ruby.h" -#include "version.h" #include "rubysig.h" /* for DEFER_INTS, ENABLE_INTS */ +#define USE_COMBINATION_EXPLOSION_CHECK /* (X*)* */ #define USE_MULTI_THREAD_SYSTEM +#define THREAD_SYSTEM_INIT +#define THREAD_SYSTEM_END #define THREAD_ATOMIC_START DEFER_INTS #define THREAD_ATOMIC_END ENABLE_INTS #define THREAD_PASS rb_thread_schedule() -#define CHECK_INTERRUPT do {\ - if (rb_trap_pending) {\ - if (! rb_prohibit_interrupt) {\ - rb_trap_exec();\ - }\ - }\ -} while (0) -#define DEFAULT_WARN_FUNCTION rb_warn -#define DEFAULT_VERB_WARN_FUNCTION rb_warning +#define DEFAULT_WARN_FUNCTION onig_rb_warn +#define DEFAULT_VERB_WARN_FUNCTION onig_rb_warning #endif /* else NOT_RUBY */ +#define STATE_CHECK_STRING_THRESHOLD_LEN 7 +#define STATE_CHECK_BUFF_MAX_SIZE 0x4000 + #define THREAD_PASS_LIMIT_COUNT 8 #define xmemset memset #define xmemcpy memcpy #define xmemmove memmove #if defined(_WIN32) && !defined(__GNUC__) #define xalloca _alloca -#ifdef NOT_RUBY #if _MSC_VER < 1500 -# define vsnprintf _vsnprintf +#ifndef vsnprintf +#define vsnprintf _vsnprintf #endif #endif #else #define xalloca alloca #endif +#if defined(USE_RECOMPILE_API) && defined(USE_MULTI_THREAD_SYSTEM) +#define ONIG_STATE_INC(reg) (reg)->state++ +#define ONIG_STATE_DEC(reg) (reg)->state-- -#ifdef USE_MULTI_THREAD_SYSTEM -#define ONIG_STATE_INC(reg) (reg)->state++ -#define ONIG_STATE_DEC(reg) (reg)->state-- +#define ONIG_STATE_INC_THREAD(reg) do {\ + THREAD_ATOMIC_START;\ + (reg)->state++;\ + THREAD_ATOMIC_END;\ +} while(0) +#define ONIG_STATE_DEC_THREAD(reg) do {\ + THREAD_ATOMIC_START;\ + (reg)->state--;\ + THREAD_ATOMIC_END;\ +} while(0) #else -#define ONIG_STATE_INC(reg) /* Nothing */ -#define ONIG_STATE_DEC(reg) /* Nothing */ -#endif /* USE_MULTI_THREAD_SYSTEM */ +#define ONIG_STATE_INC(reg) /* Nothing */ +#define ONIG_STATE_DEC(reg) /* Nothing */ +#define ONIG_STATE_INC_THREAD(reg) /* Nothing */ +#define ONIG_STATE_DEC_THREAD(reg) /* Nothing */ +#endif /* USE_RECOMPILE_API && USE_MULTI_THREAD_SYSTEM */ #define onig_st_is_member st_is_member @@ -209,6 +229,10 @@ #endif #endif +#ifdef __BORLANDC__ +#include +#endif + #ifdef ONIG_DEBUG # include #endif @@ -232,7 +256,8 @@ #define NULL_UCHARP ((UChar* )0) #ifndef PLATFORM_UNALIGNED_WORD_ACCESS -#define WORD_ALIGNMENT_SIZE SIZEOF_LONG +/* sizeof(OnigCodePoint) */ +#define WORD_ALIGNMENT_SIZE SIZEOF_LONG #define GET_ALIGNMENT_PAD_SIZE(addr,pad_size) do {\ (pad_size) = WORD_ALIGNMENT_SIZE \ @@ -520,7 +545,7 @@ typedef struct _BBuf { #define ANCHOR_LOOK_BEHIND_NOT (1<<13) #define ANCHOR_ANYCHAR_STAR (1<<14) /* ".*" optimize info */ -#define ANCHOR_ANYCHAR_STAR_PL (1<<15) /* ".*" optimize info (posix-line) */ +#define ANCHOR_ANYCHAR_STAR_ML (1<<15) /* ".*" optimize info (multi-line) */ /* operation code */ enum OpCode { @@ -560,8 +585,6 @@ enum OpCode { OP_WORD, OP_NOT_WORD, - OP_WORD_SB, - OP_WORD_MB, OP_WORD_BOUND, OP_NOT_WORD_BOUND, OP_WORD_BEGIN, @@ -576,11 +599,11 @@ enum OpCode { OP_BACKREF1, OP_BACKREF2, - OP_BACKREF3, OP_BACKREFN, OP_BACKREFN_IC, OP_BACKREF_MULTI, OP_BACKREF_MULTI_IC, + OP_BACKREF_AT_LEVEL, /* \k, \k */ OP_MEMORY_START, OP_MEMORY_START_PUSH, /* push back-tracker to stack */ @@ -620,34 +643,33 @@ enum OpCode { OP_FAIL_LOOK_BEHIND_NOT, /* (? */ - OP_RETURN -}; + OP_RETURN, -/* arguments type */ -#define ARG_SPECIAL -1 -#define ARG_NON 0 -#define ARG_RELADDR 1 -#define ARG_ABSADDR 2 -#define ARG_LENGTH 3 -#define ARG_MEMNUM 4 -#define ARG_OPTION 5 + OP_STATE_CHECK_PUSH, /* combination explosion check and push */ + OP_STATE_CHECK_PUSH_OR_JUMP, /* check ok -> push, else jump */ + OP_STATE_CHECK, /* check only */ + OP_STATE_CHECK_ANYCHAR_STAR, + OP_STATE_CHECK_ANYCHAR_ML_STAR +}; typedef int RelAddrType; typedef int AbsAddrType; typedef int LengthType; typedef int RepeatNumType; typedef short int MemNumType; +typedef short int StateCheckNumType; typedef void* PointerType; -#define SIZE_OPCODE 1 -#define SIZE_RELADDR sizeof(RelAddrType) -#define SIZE_ABSADDR sizeof(AbsAddrType) -#define SIZE_LENGTH sizeof(LengthType) -#define SIZE_MEMNUM sizeof(MemNumType) -#define SIZE_REPEATNUM sizeof(RepeatNumType) -#define SIZE_OPTION sizeof(OnigOptionType) -#define SIZE_CODE_POINT sizeof(OnigCodePoint) -#define SIZE_POINTER sizeof(PointerType) +#define SIZE_OPCODE 1 +#define SIZE_RELADDR sizeof(RelAddrType) +#define SIZE_ABSADDR sizeof(AbsAddrType) +#define SIZE_LENGTH sizeof(LengthType) +#define SIZE_MEMNUM sizeof(MemNumType) +#define SIZE_STATE_CHECK_NUM sizeof(StateCheckNumType) +#define SIZE_REPEATNUM sizeof(RepeatNumType) +#define SIZE_OPTION sizeof(OnigOptionType) +#define SIZE_CODE_POINT sizeof(OnigCodePoint) +#define SIZE_POINTER sizeof(PointerType) #ifdef PLATFORM_UNALIGNED_WORD_ACCESS @@ -673,6 +695,7 @@ typedef void* PointerType; #define GET_REPEATNUM_INC(num,p) PLATFORM_GET_INC(num, p, RepeatNumType) #define GET_OPTION_INC(option,p) PLATFORM_GET_INC(option, p, OnigOptionType) #define GET_POINTER_INC(ptr,p) PLATFORM_GET_INC(ptr, p, PointerType) +#define GET_STATE_CHECK_NUM_INC(num,p) PLATFORM_GET_INC(num, p, StateCheckNumType) /* code point's address must be aligned address. */ #define GET_CODE_POINT(code,p) code = *((OnigCodePoint* )(p)) @@ -715,6 +738,12 @@ typedef void* PointerType; #define SIZE_OP_CALL (SIZE_OPCODE + SIZE_ABSADDR) #define SIZE_OP_RETURN SIZE_OPCODE +#ifdef USE_COMBINATION_EXPLOSION_CHECK +#define SIZE_OP_STATE_CHECK (SIZE_OPCODE + SIZE_STATE_CHECK_NUM) +#define SIZE_OP_STATE_CHECK_PUSH (SIZE_OPCODE + SIZE_STATE_CHECK_NUM + SIZE_RELADDR) +#define SIZE_OP_STATE_CHECK_PUSH_OR_JUMP (SIZE_OPCODE + SIZE_STATE_CHECK_NUM + SIZE_RELADDR) +#define SIZE_OP_STATE_CHECK_ANYCHAR_STAR (SIZE_OPCODE + SIZE_STATE_CHECK_NUM) +#endif #define MC_ESC(enc) (enc)->meta_char_table.esc #define MC_ANYCHAR(enc) (enc)->meta_char_table.anychar @@ -723,6 +752,11 @@ typedef void* PointerType; #define MC_ONE_OR_MORE_TIME(enc) (enc)->meta_char_table.one_or_more_time #define MC_ANYCHAR_ANYTIME(enc) (enc)->meta_char_table.anychar_anytime +#define IS_MC_ESC_CODE(code, enc, syn) \ + ((code) == MC_ESC(enc) && \ + !IS_SYNTAX_OP2((syn), ONIG_SYN_OP2_INEFFECTIVE_ESCAPE)) + + #define SYN_POSIX_COMMON_OP \ ( ONIG_SYN_OP_DOT_ANYCHAR | ONIG_SYN_OP_POSIX_BRACKET | \ ONIG_SYN_OP_DECIMAL_BACKREF | \ @@ -783,13 +817,14 @@ extern void onig_print_statistics P_((FILE* f)); #endif #endif -extern char* onig_error_code_to_format P_((int code)); -extern void onig_snprintf_with_pattern PV_((char buf[], int bufsize, OnigEncoding enc, char* pat, char* pat_end, char *fmt, ...)); +extern UChar* onig_error_code_to_format P_((int code)); +extern void onig_snprintf_with_pattern PV_((UChar buf[], int bufsize, OnigEncoding enc, UChar* pat, UChar* pat_end, const UChar *fmt, ...)); extern int onig_bbuf_init P_((BBuf* buf, int size)); extern int onig_alloc_init P_((regex_t** reg, OnigOptionType option, OnigAmbigType ambig_flag, OnigEncoding enc, OnigSyntaxType* syntax)); extern int onig_compile P_((regex_t* reg, const UChar* pattern, const UChar* pattern_end, OnigErrorInfo* einfo)); extern void onig_chain_reduce P_((regex_t* reg)); extern void onig_chain_link_add P_((regex_t* to, regex_t* add)); extern void onig_transfer P_((regex_t* to, regex_t* from)); +extern int onig_is_code_in_cc P_((OnigEncoding enc, OnigCodePoint code, CClassNode* cc)); #endif /* REGINT_H */ diff --git a/ext/mbstring/oniguruma/regparse.c b/ext/mbstring/oniguruma/regparse.c index 58e122f486..af5c8593e6 100644 --- a/ext/mbstring/oniguruma/regparse.c +++ b/ext/mbstring/oniguruma/regparse.c @@ -2,7 +2,7 @@ regparse.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2005 K.Kosako + * Copyright (c) 2002-2007 K.Kosako * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -60,6 +60,20 @@ OnigSyntaxType* OnigDefaultSyntax = ONIG_SYNTAX_RUBY; extern void onig_null_warn(const char* s) { } +#ifdef RUBY_PLATFORM +extern void +onig_rb_warn(const char* s) +{ + rb_warn("%s", s); +} + +extern void +onig_rb_warning(const char* s) +{ + rb_warning("%s", s); +} +#endif + #ifdef DEFAULT_WARN_FUNCTION static OnigWarnFunc onig_warn = (OnigWarnFunc )DEFAULT_WARN_FUNCTION; #else @@ -305,6 +319,88 @@ typedef struct { #include "st.h" +typedef struct { + unsigned char* s; + unsigned char* end; +} st_strend_key; + +static int strend_cmp(st_strend_key*, st_strend_key*); +static int strend_hash(st_strend_key*); + +static struct st_hash_type type_strend_hash = { + strend_cmp, + strend_hash, +}; + +static st_table* +onig_st_init_strend_table_with_size(int size) +{ + return onig_st_init_table_with_size(&type_strend_hash, size); +} + +static int +onig_st_lookup_strend(st_table *table, const UChar* str_key, const UChar* end_key, st_data_t *value) +{ + st_strend_key key; + + key.s = (unsigned char* )str_key; + key.end = (unsigned char* )end_key; + + return onig_st_lookup(table, (st_data_t )(&key), value); +} + +static int +onig_st_insert_strend(st_table *table, const UChar* str_key, const UChar* end_key, st_data_t value) +{ + st_strend_key* key; + int result; + + key = (st_strend_key* )xmalloc(sizeof(st_strend_key)); + key->s = (unsigned char* )str_key; + key->end = (unsigned char* )end_key; + result = onig_st_insert(table, (st_data_t )key, value); + if (result) { + xfree(key); + } + return result; +} + +static int +strend_cmp(st_strend_key* x, st_strend_key* y) +{ + unsigned char *p, *q; + int c; + + if ((x->end - x->s) != (y->end - y->s)) + return 1; + + p = x->s; + q = y->s; + while (p < x->end) { + c = (int )*p - (int )*q; + if (c != 0) return c; + + p++; q++; + } + + return 0; +} + +static int +strend_hash(st_strend_key* x) +{ + int val; + unsigned char *p; + + val = 0; + p = x->s; + while (p < x->end) { + val = val * 997 + (int )*p++; + } + + return val + (val >> 5); +} + typedef st_table NameTable; typedef st_data_t HashDataType; /* 1.6 st.h doesn't define st_data_t type */ @@ -350,8 +446,10 @@ onig_print_names(FILE* fp, regex_t* reg) static int i_free_name_entry(UChar* key, NameEntry* e, void* arg) { - xfree(e->name); /* == key */ + xfree(e->name); if (IS_NOT_NULL(e->back_refs)) xfree(e->back_refs); + xfree(key); + xfree(e); return ST_DELETE; } @@ -801,6 +899,23 @@ onig_number_of_names(regex_t* reg) } #endif /* else USE_NAMED_GROUP */ +extern int +onig_noname_group_capture_is_active(regex_t* reg) +{ + if (ONIG_IS_OPTION_ON(reg->options, ONIG_OPTION_DONT_CAPTURE_GROUP)) + return 0; + +#ifdef USE_NAMED_GROUP + if (onig_number_of_names(reg) > 0 && + IS_SYNTAX_BV(reg->syntax, ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP) && + !ONIG_IS_OPTION_ON(reg->options, ONIG_OPTION_CAPTURE_GROUP)) { + return 0; + } +#endif + + return 1; +} + #define INIT_SCANENV_MEMNODES_ALLOC_SIZE 16 @@ -825,6 +940,13 @@ scan_env_clear(ScanEnv* env) for (i = 0; i < SCANENV_MEMNODES_SIZE; i++) env->mem_nodes_static[i] = NULL_NODE; + +#ifdef USE_COMBINATION_EXPLOSION_CHECK + env->num_comb_exp_check = 0; + env->comb_exp_max_regnum = 0; + env->curr_max_regnum = 0; + env->has_recursion = 0; +#endif } static int @@ -929,9 +1051,9 @@ onig_node_free(Node* node) } break; - case N_QUALIFIER: - if (NQUALIFIER(node).target) - onig_node_free(NQUALIFIER(node).target); + case N_QUANTIFIER: + if (NQUANTIFIER(node).target) + onig_node_free(NQUANTIFIER(node).target); break; case N_EFFECT: @@ -966,34 +1088,35 @@ onig_node_free(Node* node) #ifdef USE_RECYCLE_NODE extern int -onig_free_node_list() +onig_free_node_list(void) { FreeNode* n; - THREAD_ATOMIC_START; - while (FreeNodeList) { + /* THREAD_ATOMIC_START; */ + while (IS_NOT_NULL(FreeNodeList)) { n = FreeNodeList; FreeNodeList = FreeNodeList->next; xfree(n); } - THREAD_ATOMIC_END; + /* THREAD_ATOMIC_END; */ return 0; } #endif static Node* -node_new() +node_new(void) { Node* node; #ifdef USE_RECYCLE_NODE + THREAD_ATOMIC_START; if (IS_NOT_NULL(FreeNodeList)) { - THREAD_ATOMIC_START; node = (Node* )FreeNodeList; FreeNodeList = FreeNodeList->next; THREAD_ATOMIC_END; return node; } + THREAD_ATOMIC_END; #endif node = (Node* )xmalloc(sizeof(Node)); @@ -1010,7 +1133,7 @@ initialize_cclass(CClassNode* cc) } static Node* -node_new_cclass() +node_new_cclass(void) { Node* node = node_new(); CHECK_NULL_RETURN(node); @@ -1020,9 +1143,9 @@ node_new_cclass() return node; } -extern Node* +static Node* node_new_cclass_by_codepoint_range(int not, - OnigCodePoint sbr[], OnigCodePoint mbr[]) + const OnigCodePoint sbr[], const OnigCodePoint mbr[]) { CClassNode* cc; int n, i, j; @@ -1079,7 +1202,7 @@ node_new_ctype(int type) } static Node* -node_new_anychar() +node_new_anychar(void) { Node* node = node_new(); CHECK_NULL_RETURN(node); @@ -1128,7 +1251,11 @@ onig_node_new_anchor(int type) } static Node* -node_new_backref(int back_num, int* backrefs, int by_name, ScanEnv* env) +node_new_backref(int back_num, int* backrefs, int by_name, +#ifdef USE_BACKREF_AT_LEVEL + int exist_level, int nest_level, +#endif + ScanEnv* env) { int i; Node* node = node_new(); @@ -1141,6 +1268,13 @@ node_new_backref(int back_num, int* backrefs, int by_name, ScanEnv* env) if (by_name != 0) NBACKREF(node).state |= NST_NAME_REF; +#ifdef USE_BACKREF_AT_LEVEL + if (exist_level != 0) { + NBACKREF(node).state |= NST_NEST_LEVEL; + NBACKREF(node).nest_level = nest_level; + } +#endif + for (i = 0; i < back_num; i++) { if (backrefs[i] <= env->num_mem && IS_NULL(SCANENV_MEM_NODES(env)[backrefs[i]])) { @@ -1184,21 +1318,27 @@ node_new_call(UChar* name, UChar* name_end) #endif static Node* -node_new_qualifier(int lower, int upper, int by_number) +node_new_quantifier(int lower, int upper, int by_number) { Node* node = node_new(); CHECK_NULL_RETURN(node); - node->type = N_QUALIFIER; - NQUALIFIER(node).state = 0; - NQUALIFIER(node).target = NULL; - NQUALIFIER(node).lower = lower; - NQUALIFIER(node).upper = upper; - NQUALIFIER(node).greedy = 1; - NQUALIFIER(node).by_number = by_number; - NQUALIFIER(node).target_empty_info = NQ_TARGET_ISNOT_EMPTY; - NQUALIFIER(node).head_exact = NULL_NODE; - NQUALIFIER(node).next_head_exact = NULL_NODE; - NQUALIFIER(node).is_refered = 0; + node->type = N_QUANTIFIER; + NQUANTIFIER(node).state = 0; + NQUANTIFIER(node).target = NULL; + NQUANTIFIER(node).lower = lower; + NQUANTIFIER(node).upper = upper; + NQUANTIFIER(node).greedy = 1; + NQUANTIFIER(node).target_empty_info = NQ_TARGET_ISNOT_EMPTY; + NQUANTIFIER(node).head_exact = NULL_NODE; + NQUANTIFIER(node).next_head_exact = NULL_NODE; + NQUANTIFIER(node).is_refered = 0; + if (by_number != 0) + NQUANTIFIER(node).state |= NST_BY_NUMBER; + +#ifdef USE_COMBINATION_EXPLOSION_CHECK + NQUANTIFIER(node).comb_exp_check_num = 0; +#endif + return node; } @@ -1341,6 +1481,7 @@ onig_node_new_str(const UChar* s, const UChar* end) return node_new_str(s, end); } +#ifdef NUMBERED_CHAR_IS_NOT_CASE_AMBIG static Node* node_new_str_raw(UChar* s, UChar* end) { @@ -1348,20 +1489,21 @@ node_new_str_raw(UChar* s, UChar* end) NSTRING_SET_RAW(node); return node; } +#endif static Node* -node_new_empty() +node_new_empty(void) { return node_new_str(NULL, NULL); } static Node* -node_new_str_raw_char(UChar c) +node_new_str_char(UChar c) { UChar p[1]; p[0] = c; - return node_new_str_raw(p, p + 1); + return node_new_str(p, p + 1); } static Node* @@ -1391,6 +1533,24 @@ str_node_can_be_split(StrNode* sn, OnigEncoding enc) return 0; } +#ifdef USE_PAD_TO_SHORT_BYTE_CHAR +static int +node_str_head_pad(StrNode* sn, int num, UChar val) +{ + UChar buf[NODE_STR_BUF_SIZE]; + int i, len; + + len = sn->end - sn->s; + onig_strcpy(buf, sn->s, sn->end); + onig_strcpy(&(sn->s[num]), buf, buf + len); + sn->end += num; + + for (i = 0; i < num; i++) { + sn->s[i] = val; + } +} +#endif + extern int onig_scan_unsigned_number(UChar** src, const UChar* end, OnigEncoding enc) { @@ -1784,29 +1944,6 @@ and_code_range_buf(BBuf* bbuf1, int not1, BBuf* bbuf2, int not2, BBuf** pbuf) return 0; } -static int -clear_not_flag_cclass(CClassNode* cc, OnigEncoding enc) -{ - BBuf *tbuf; - int r; - - if (IS_CCLASS_NOT(cc)) { - bitset_invert(cc->bs); - - if (! ONIGENC_IS_SINGLEBYTE(enc)) { - r = not_code_range_buf(enc, cc->mbuf, &tbuf); - if (r != 0) return r; - - bbuf_free(cc->mbuf); - cc->mbuf = tbuf; - } - - CCLASS_CLEAR_NOT(cc); - } - - return 0; -} - static int and_cclass(CClassNode* dest, CClassNode* cc, OnigEncoding enc) { @@ -1949,7 +2086,7 @@ conv_backslash_value(int c, ScanEnv* env) } static int -is_invalid_qualifier_target(Node* node) +is_invalid_quantifier_target(Node* node) { switch (NTYPE(node)) { case N_ANCHOR: @@ -1958,19 +2095,19 @@ is_invalid_qualifier_target(Node* node) case N_EFFECT: if (NEFFECT(node).type == EFFECT_OPTION) - return is_invalid_qualifier_target(NEFFECT(node).target); + return is_invalid_quantifier_target(NEFFECT(node).target); break; case N_LIST: /* ex. (?:\G\A)* */ do { - if (! is_invalid_qualifier_target(NCONS(node).left)) return 0; + if (! is_invalid_quantifier_target(NCONS(node).left)) return 0; } while (IS_NOT_NULL(node = NCONS(node).right)); return 0; break; case N_ALT: /* ex. (?:abc|\A)* */ do { - if (is_invalid_qualifier_target(NCONS(node).left)) return 1; + if (is_invalid_quantifier_target(NCONS(node).left)) return 1; } while (IS_NOT_NULL(node = NCONS(node).right)); break; @@ -1982,7 +2119,7 @@ is_invalid_qualifier_target(Node* node) /* ?:0, *:1, +:2, ??:3, *?:4, +?:5 */ static int -popular_qualifier_num(QualifierNode* qf) +popular_quantifier_num(QuantifierNode* qf) { if (qf->greedy) { if (qf->lower == 0) { @@ -2013,7 +2150,7 @@ enum ReduceType { RQ_AQ, /* to '*?' */ RQ_QQ, /* to '??' */ RQ_P_QQ, /* to '+)??' */ - RQ_PQ_Q, /* to '+?)?' */ + RQ_PQ_Q /* to '+?)?' */ }; static enum ReduceType ReduceTypeTable[6][6] = { @@ -2026,15 +2163,15 @@ static enum ReduceType ReduceTypeTable[6][6] = { }; extern void -onig_reduce_nested_qualifier(Node* pnode, Node* cnode) +onig_reduce_nested_quantifier(Node* pnode, Node* cnode) { int pnum, cnum; - QualifierNode *p, *c; + QuantifierNode *p, *c; - p = &(NQUALIFIER(pnode)); - c = &(NQUALIFIER(cnode)); - pnum = popular_qualifier_num(p); - cnum = popular_qualifier_num(c); + p = &(NQUANTIFIER(pnode)); + c = &(NQUANTIFIER(cnode)); + pnum = popular_quantifier_num(p); + cnum = popular_quantifier_num(c); switch(ReduceTypeTable[cnum][pnum]) { case RQ_DEL: @@ -2125,6 +2262,10 @@ typedef struct { int ref1; int* refs; int by_name; +#ifdef USE_BACKREF_AT_LEVEL + int exist_level; + int level; /* \k */ +#endif } backref; struct { UChar* name; @@ -2138,7 +2279,7 @@ typedef struct { static int -fetch_range_qualifier(UChar** src, UChar* end, OnigToken* tok, ScanEnv* env) +fetch_range_quantifier(UChar** src, UChar* end, OnigToken* tok, ScanEnv* env) { int low, up, syn_allow, non_low = 0; int r = 0; @@ -2274,15 +2415,17 @@ fetch_escaped_value(UChar** src, UChar* end, ScanEnv* env) control: if (PEND) return ONIGERR_END_PATTERN_AT_CONTROL; PFETCH(c); - if (c == MC_ESC(enc)) { - v = fetch_escaped_value(&p, end, env); - if (v < 0) return v; - c = (OnigCodePoint )v; - } - else if (c == '?') + if (c == '?') { c = 0177; - else + } + else { + if (c == MC_ESC(enc)) { + v = fetch_escaped_value(&p, end, env); + if (v < 0) return v; + c = (OnigCodePoint )v; + } c &= 0x9f; + } break; } /* fall through */ @@ -2302,6 +2445,89 @@ fetch_escaped_value(UChar** src, UChar* end, ScanEnv* env) static int fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env); #ifdef USE_NAMED_GROUP +#ifdef USE_BACKREF_AT_LEVEL +/* + \k, \k +*/ +static int +fetch_name_with_level(UChar** src, UChar* end, UChar** rname_end + , ScanEnv* env, int* level) +{ + int r, exist_level = 0; + OnigCodePoint c = 0; + OnigCodePoint first_code; + OnigEncoding enc = env->enc; + UChar *name_end; + UChar *p = *src; + PFETCH_READY; + + name_end = end; + r = 0; + if (PEND) { + return ONIGERR_EMPTY_GROUP_NAME; + } + else { + PFETCH(c); + first_code = c; + if (c == '>') + return ONIGERR_EMPTY_GROUP_NAME; + + if (!ONIGENC_IS_CODE_WORD(enc, c)) { + r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME; + } + } + + while (!PEND) { + name_end = p; + PFETCH(c); + if (c == '>' || c == ')' || c == '+' || c == '-') break; + + if (!ONIGENC_IS_CODE_WORD(enc, c)) { + r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME; + } + } + + if (c != '>') { + if (c == '+' || c == '-') { + int num; + int flag = (c == '-' ? -1 : 1); + + PFETCH(c); + if (! ONIGENC_IS_CODE_DIGIT(enc, c)) goto err; + PUNFETCH; + num = onig_scan_unsigned_number(&p, end, enc); + if (num < 0) return ONIGERR_TOO_BIG_NUMBER; + *level = (num * flag); + exist_level = 1; + + PFETCH(c); + if (c == '>') + goto first_check; + } + + err: + r = ONIGERR_INVALID_GROUP_NAME; + name_end = end; + } + else { + first_check: + if (ONIGENC_IS_CODE_ASCII(first_code) && + ONIGENC_IS_CODE_UPPER(enc, first_code)) + r = ONIGERR_INVALID_GROUP_NAME; + } + + if (r == 0) { + *rname_end = name_end; + *src = p; + return (exist_level ? 1 : 0); + } + else { + onig_scan_env_set_error_string(env, r, *src, name_end); + return r; + } +} +#endif /* USE_BACKREF_AT_LEVEL */ + /* def: 0 -> define name (don't allow number name) 1 -> reference name (allow number name) @@ -2428,11 +2654,11 @@ CC_ESC_WARN(ScanEnv* env, UChar *c) if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_WARN_CC_OP_NOT_ESCAPED) && IS_SYNTAX_BV(env->syntax, ONIG_SYN_BACKSLASH_ESCAPE_IN_CC)) { - char buf[WARN_BUFSIZE]; + UChar buf[WARN_BUFSIZE]; onig_snprintf_with_pattern(buf, WARN_BUFSIZE, env->enc, env->pattern, env->pattern_end, - "character class has '%s' without escape", c); - (*onig_warn)(buf); + (UChar* )"character class has '%s' without escape", c); + (*onig_warn)((char* )buf); } } @@ -2442,11 +2668,11 @@ CCEND_ESC_WARN(ScanEnv* env, UChar* c) if (onig_warn == onig_null_warn) return ; if (IS_SYNTAX_BV((env)->syntax, ONIG_SYN_WARN_CC_OP_NOT_ESCAPED)) { - char buf[WARN_BUFSIZE]; + UChar buf[WARN_BUFSIZE]; onig_snprintf_with_pattern(buf, WARN_BUFSIZE, (env)->enc, (env)->pattern, (env)->pattern_end, - "regular expression has '%s' without escape", c); - (*onig_warn)(buf); + (UChar* )"regular expression has '%s' without escape", c); + (*onig_warn)((char* )buf); } } @@ -2537,6 +2763,8 @@ fetch_token_in_cc(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env) tok->type = TK_CHAR; tok->base = 0; tok->u.c = c; + tok->escaped = 0; + if (c == ']') { tok->type = TK_CC_CLOSE; } @@ -2708,7 +2936,7 @@ fetch_token_in_cc(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env) tok->type = TK_CC_CC_OPEN; } else { - CC_ESC_WARN(env, "["); + CC_ESC_WARN(env, (UChar* )"["); } } } @@ -2747,7 +2975,7 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env) tok->backp = p; PFETCH(c); - if (c == MC_ESC(enc)) { + if (IS_MC_ESC_CODE(c, enc, syn)) { if (PEND) return ONIGERR_END_PATTERN_AT_ESCAPE; tok->backp = p; @@ -2804,7 +3032,7 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env) case '{': if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_BRACE_INTERVAL)) break; - r = fetch_range_qualifier(&p, end, tok, env); + r = fetch_range_quantifier(&p, end, tok, env); if (r < 0) return r; /* error */ if (r == 0) goto greedy_check; else if (r == 2) { /* {n} */ @@ -3012,6 +3240,9 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env) tok->u.backref.num = 1; tok->u.backref.ref1 = num; tok->u.backref.by_name = 0; +#ifdef USE_BACKREF_AT_LEVEL + tok->u.backref.exist_level = 0; +#endif break; } @@ -3050,8 +3281,17 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env) int* backs; prev = p; + +#ifdef USE_BACKREF_AT_LEVEL + name_end = NULL_UCHARP; /* no need. escape gcc warning. */ + r = fetch_name_with_level(&p, end, &name_end, env, &tok->u.backref.level); + if (r == 1) tok->u.backref.exist_level = 1; + else tok->u.backref.exist_level = 0; +#else r = fetch_name(&p, end, &name_end, env, 1); +#endif if (r < 0) return r; + num = onig_name_to_group_numbers(env->reg, prev, name_end, &backs); if (num <= 0) { onig_scan_env_set_error_string(env, @@ -3170,13 +3410,17 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env) switch (c) { case '.': if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_DOT_ANYCHAR)) break; +#ifdef USE_VARIABLE_META_CHARS any_char: +#endif tok->type = TK_ANYCHAR; break; case '*': if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ASTERISK_ZERO_INF)) break; +#ifdef USE_VARIABLE_META_CHARS anytime: +#endif tok->type = TK_OP_REPEAT; tok->u.repeat.lower = 0; tok->u.repeat.upper = REPEAT_INFINITE; @@ -3185,7 +3429,9 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env) case '+': if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_PLUS_ONE_INF)) break; +#ifdef USE_VARIABLE_META_CHARS one_or_more_time: +#endif tok->type = TK_OP_REPEAT; tok->u.repeat.lower = 1; tok->u.repeat.upper = REPEAT_INFINITE; @@ -3194,7 +3440,9 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env) case '?': if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_QMARK_ZERO_ONE)) break; +#ifdef USE_VARIABLE_META_CHARS zero_or_one_time: +#endif tok->type = TK_OP_REPEAT; tok->u.repeat.lower = 0; tok->u.repeat.upper = 1; @@ -3203,7 +3451,7 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env) case '{': if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_BRACE_INTERVAL)) break; - r = fetch_range_qualifier(&p, end, tok, env); + r = fetch_range_quantifier(&p, end, tok, env); if (r < 0) return r; /* error */ if (r == 0) goto greedy_check; else if (r == 2) { /* {n} */ @@ -3261,7 +3509,7 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env) if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_LINE_ANCHOR)) break; tok->type = TK_ANCHOR; tok->u.subtype = (IS_SINGLELINE(env->option) - ? ANCHOR_END_BUF : ANCHOR_END_LINE); + ? ANCHOR_SEMI_END_BUF : ANCHOR_END_LINE); break; case '[': @@ -3271,7 +3519,7 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env) case ']': if (*src > env->pattern) /* /].../ is allowed. */ - CCEND_ESC_WARN(env, "]"); + CCEND_ESC_WARN(env, (UChar* )"]"); break; case '#': @@ -3297,14 +3545,16 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env) } } +#ifdef USE_VARIABLE_META_CHARS out: +#endif *src = p; return tok->type; } static int add_ctype_to_cc_by_range(CClassNode* cc, int ctype, int not, OnigEncoding enc, - OnigCodePoint sbr[], OnigCodePoint mbr[]) + const OnigCodePoint sbr[], const OnigCodePoint mbr[]) { int i, r; OnigCodePoint j; @@ -3368,7 +3618,7 @@ static int add_ctype_to_cc(CClassNode* cc, int ctype, int not, ScanEnv* env) { int c, r; - OnigCodePoint *sbr, *mbr; + const OnigCodePoint *sbr, *mbr; OnigEncoding enc = env->enc; r = ONIGENC_GET_CTYPE_CODE_RANGE(enc, ctype, &sbr, &mbr); @@ -3506,19 +3756,19 @@ parse_posix_bracket(CClassNode* cc, UChar** src, UChar* end, ScanEnv* env) #define POSIX_BRACKET_NAME_MAX_LEN 6 static PosixBracketEntryType PBS[] = { - { "alnum", ONIGENC_CTYPE_ALNUM, 5 }, - { "alpha", ONIGENC_CTYPE_ALPHA, 5 }, - { "blank", ONIGENC_CTYPE_BLANK, 5 }, - { "cntrl", ONIGENC_CTYPE_CNTRL, 5 }, - { "digit", ONIGENC_CTYPE_DIGIT, 5 }, - { "graph", ONIGENC_CTYPE_GRAPH, 5 }, - { "lower", ONIGENC_CTYPE_LOWER, 5 }, - { "print", ONIGENC_CTYPE_PRINT, 5 }, - { "punct", ONIGENC_CTYPE_PUNCT, 5 }, - { "space", ONIGENC_CTYPE_SPACE, 5 }, - { "upper", ONIGENC_CTYPE_UPPER, 5 }, - { "xdigit", ONIGENC_CTYPE_XDIGIT, 6 }, - { "ascii", ONIGENC_CTYPE_ASCII, 5 }, /* I don't know origin. Perl? */ + { (UChar* )"alnum", ONIGENC_CTYPE_ALNUM, 5 }, + { (UChar* )"alpha", ONIGENC_CTYPE_ALPHA, 5 }, + { (UChar* )"blank", ONIGENC_CTYPE_BLANK, 5 }, + { (UChar* )"cntrl", ONIGENC_CTYPE_CNTRL, 5 }, + { (UChar* )"digit", ONIGENC_CTYPE_DIGIT, 5 }, + { (UChar* )"graph", ONIGENC_CTYPE_GRAPH, 5 }, + { (UChar* )"lower", ONIGENC_CTYPE_LOWER, 5 }, + { (UChar* )"print", ONIGENC_CTYPE_PRINT, 5 }, + { (UChar* )"punct", ONIGENC_CTYPE_PUNCT, 5 }, + { (UChar* )"space", ONIGENC_CTYPE_SPACE, 5 }, + { (UChar* )"upper", ONIGENC_CTYPE_UPPER, 5 }, + { (UChar* )"xdigit", ONIGENC_CTYPE_XDIGIT, 6 }, + { (UChar* )"ascii", ONIGENC_CTYPE_ASCII, 5 }, { (UChar* )NULL, -1, 0 } }; @@ -3542,7 +3792,7 @@ parse_posix_bracket(CClassNode* cc, UChar** src, UChar* end, ScanEnv* env) for (pb = PBS; IS_NOT_NULL(pb->name); pb++) { if (onigenc_with_ascii_strncmp(enc, p, end, pb->name, pb->len) == 0) { p = (UChar* )onigenc_step(enc, p, end, pb->len); - if (onigenc_with_ascii_strncmp(enc, p, end, ":]", 2) != 0) + if (onigenc_with_ascii_strncmp(enc, p, end, (UChar* )":]", 2) != 0) return ONIGERR_INVALID_POSIX_BRACKET_TYPE; r = add_ctype_to_cc(cc, pb->ctype, not, env); @@ -3577,19 +3827,19 @@ static int property_name_to_ctype(UChar* p, UChar* end, OnigEncoding enc) { static PosixBracketEntryType PBS[] = { - { "Alnum", ONIGENC_CTYPE_ALNUM, 5 }, - { "Alpha", ONIGENC_CTYPE_ALPHA, 5 }, - { "Blank", ONIGENC_CTYPE_BLANK, 5 }, - { "Cntrl", ONIGENC_CTYPE_CNTRL, 5 }, - { "Digit", ONIGENC_CTYPE_DIGIT, 5 }, - { "Graph", ONIGENC_CTYPE_GRAPH, 5 }, - { "Lower", ONIGENC_CTYPE_LOWER, 5 }, - { "Print", ONIGENC_CTYPE_PRINT, 5 }, - { "Punct", ONIGENC_CTYPE_PUNCT, 5 }, - { "Space", ONIGENC_CTYPE_SPACE, 5 }, - { "Upper", ONIGENC_CTYPE_UPPER, 5 }, - { "XDigit", ONIGENC_CTYPE_XDIGIT, 6 }, - { "ASCII", ONIGENC_CTYPE_ASCII, 5 }, + { (UChar* )"Alnum", ONIGENC_CTYPE_ALNUM, 5 }, + { (UChar* )"Alpha", ONIGENC_CTYPE_ALPHA, 5 }, + { (UChar* )"Blank", ONIGENC_CTYPE_BLANK, 5 }, + { (UChar* )"Cntrl", ONIGENC_CTYPE_CNTRL, 5 }, + { (UChar* )"Digit", ONIGENC_CTYPE_DIGIT, 5 }, + { (UChar* )"Graph", ONIGENC_CTYPE_GRAPH, 5 }, + { (UChar* )"Lower", ONIGENC_CTYPE_LOWER, 5 }, + { (UChar* )"Print", ONIGENC_CTYPE_PRINT, 5 }, + { (UChar* )"Punct", ONIGENC_CTYPE_PUNCT, 5 }, + { (UChar* )"Space", ONIGENC_CTYPE_SPACE, 5 }, + { (UChar* )"Upper", ONIGENC_CTYPE_UPPER, 5 }, + { (UChar* )"XDigit", ONIGENC_CTYPE_XDIGIT, 6 }, + { (UChar* )"ASCII", ONIGENC_CTYPE_ASCII, 5 }, { (UChar* )NULL, -1, 0 } }; @@ -3839,7 +4089,7 @@ parse_char_class(Node** np, OnigToken* tok, UChar** src, UChar* end, *src, env->pattern_end, 1, env->enc)) return ONIGERR_EMPTY_CHAR_CLASS; - CC_ESC_WARN(env, "]"); + CC_ESC_WARN(env, (UChar* )"]"); r = tok->type = TK_CHAR; /* allow []...] */ } @@ -3942,7 +4192,7 @@ parse_char_class(Node** np, OnigToken* tok, UChar** src, UChar* end, r = parse_posix_bracket(cc, &p, end, env); if (r < 0) goto err; if (r == 1) { /* is not POSIX bracket */ - CC_ESC_WARN(env, "["); + CC_ESC_WARN(env, (UChar* )"["); p = tok->backp; v = (OnigCodePoint )tok->u.c; in_israw = 0; @@ -3988,7 +4238,7 @@ parse_char_class(Node** np, OnigToken* tok, UChar** src, UChar* end, goto val_entry; } else if (r == TK_CC_AND) { - CC_ESC_WARN(env, "-"); + CC_ESC_WARN(env, (UChar* )"-"); goto range_end_val; } state = CCS_RANGE; @@ -4003,12 +4253,12 @@ parse_char_class(Node** np, OnigToken* tok, UChar** src, UChar* end, fetched = 1; /* [--x] or [a&&-x] is warned. */ if (r == TK_CC_RANGE || and_start != 0) - CC_ESC_WARN(env, "-"); + CC_ESC_WARN(env, (UChar* )"-"); goto val_entry; } else if (state == CCS_RANGE) { - CC_ESC_WARN(env, "-"); + CC_ESC_WARN(env, (UChar* )"-"); goto sb_char; /* [!--x] is allowed */ } else { /* CCS_COMPLETE */ @@ -4017,12 +4267,12 @@ parse_char_class(Node** np, OnigToken* tok, UChar** src, UChar* end, fetched = 1; if (r == TK_CC_CLOSE) goto range_end_val; /* allow [a-b-] */ else if (r == TK_CC_AND) { - CC_ESC_WARN(env, "-"); + CC_ESC_WARN(env, (UChar* )"-"); goto range_end_val; } if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_DOUBLE_RANGE_OP_IN_CC)) { - CC_ESC_WARN(env, "-"); + CC_ESC_WARN(env, (UChar* )"-"); goto sb_char; /* [0-9-a] is allowed as [0-9\-a] */ } r = ONIGERR_UNMATCHED_RANGE_SPECIFIER_IN_CHAR_CLASS; @@ -4326,10 +4576,9 @@ parse_effect(Node** np, OnigToken* tok, int term, UChar** src, UChar* end, } } else { -#ifdef USE_NAMED_GROUP if (ONIG_IS_OPTION_ON(env->option, ONIG_OPTION_DONT_CAPTURE_GROUP)) goto group; -#endif + *np = node_new_effect_memory(env->option, 0); CHECK_NULL_RETURN_VAL(*np, ONIGERR_MEMORY); num = scan_env_add_mem_entry(env); @@ -4358,20 +4607,20 @@ parse_effect(Node** np, OnigToken* tok, int term, UChar** src, UChar* end, return 0; } -static char* PopularQStr[] = { +static const char* PopularQStr[] = { "?", "*", "+", "??", "*?", "+?" }; -static char* ReduceQStr[] = { +static const char* ReduceQStr[] = { "", "", "*", "*?", "??", "+ and ??", "+? and ?" }; static int -set_qualifier(Node* qnode, Node* target, int group, ScanEnv* env) +set_quantifier(Node* qnode, Node* target, int group, ScanEnv* env) { - QualifierNode* qn; + QuantifierNode* qn; - qn = &(NQUALIFIER(qnode)); + qn = &(NQUANTIFIER(qnode)); if (qn->lower == 1 && qn->upper == 1) { return 1; } @@ -4390,19 +4639,17 @@ set_qualifier(Node* qnode, Node* target, int group, ScanEnv* env) } break; - case N_QUALIFIER: + case N_QUANTIFIER: { /* check redundant double repeat. */ /* verbose warn (?:.?)? etc... but not warn (.?)? etc... */ - QualifierNode* qnt = &(NQUALIFIER(target)); + QuantifierNode* qnt = &(NQUANTIFIER(target)); + int nestq_num = popular_quantifier_num(qn); + int targetq_num = popular_quantifier_num(qnt); #ifdef USE_WARNING_REDUNDANT_NESTED_REPEAT_OPERATOR - if (qn->by_number == 0 && qnt->by_number == 0 && + if (!IS_QUANTIFIER_BY_NUMBER(qn) && !IS_QUANTIFIER_BY_NUMBER(qnt) && IS_SYNTAX_BV(env->syntax, ONIG_SYN_WARN_REDUNDANT_NESTED_REPEAT)) { - int nestq_num, targetq_num; - char buf[WARN_BUFSIZE]; - - nestq_num = popular_qualifier_num(qn); - targetq_num = popular_qualifier_num(qnt); + UChar buf[WARN_BUFSIZE]; switch(ReduceTypeTable[targetq_num][nestq_num]) { case RQ_ASIS: @@ -4411,9 +4658,9 @@ set_qualifier(Node* qnode, Node* target, int group, ScanEnv* env) case RQ_DEL: if (onig_verb_warn != onig_null_warn) { onig_snprintf_with_pattern(buf, WARN_BUFSIZE, env->enc, - env->pattern, env->pattern_end, - "redundant nested repeat operator"); - (*onig_verb_warn)(buf); + env->pattern, env->pattern_end, + (UChar* )"redundant nested repeat operator"); + (*onig_verb_warn)((char* )buf); } goto warn_exit; break; @@ -4422,10 +4669,10 @@ set_qualifier(Node* qnode, Node* target, int group, ScanEnv* env) if (onig_verb_warn != onig_null_warn) { onig_snprintf_with_pattern(buf, WARN_BUFSIZE, env->enc, env->pattern, env->pattern_end, - "nested repeat operator %s and %s was replaced with '%s'", + (UChar* )"nested repeat operator %s and %s was replaced with '%s'", PopularQStr[targetq_num], PopularQStr[nestq_num], ReduceQStr[ReduceTypeTable[targetq_num][nestq_num]]); - (*onig_verb_warn)(buf); + (*onig_verb_warn)((char* )buf); } goto warn_exit; break; @@ -4434,9 +4681,17 @@ set_qualifier(Node* qnode, Node* target, int group, ScanEnv* env) warn_exit: #endif - if (popular_qualifier_num(qnt) >= 0 && popular_qualifier_num(qn) >= 0) { - onig_reduce_nested_qualifier(qnode, target); - goto q_exit; + if (targetq_num >= 0) { + if (nestq_num >= 0) { + onig_reduce_nested_quantifier(qnode, target); + goto q_exit; + } + else if (targetq_num == 1 || targetq_num == 2) { /* * or + */ + /* (?:a*){n,m}, (?:a+){n,m} => (?:a*){n,n}, (?:a+){n,n} */ + if (! IS_REPEAT_INFINITE(qn->upper) && qn->upper > 1 && qn->greedy) { + qn->upper = (qn->lower == 0 ? 1 : qn->lower); + } + } } } break; @@ -4450,61 +4705,6 @@ set_qualifier(Node* qnode, Node* target, int group, ScanEnv* env) return 0; } -static int -make_compound_alt_node_from_cc(OnigAmbigType ambig_flag, OnigEncoding enc, - CClassNode* cc, Node** root) -{ - int r, i, j, k, clen, len, ncode, n; - UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN]; - Node **ptail, *snode = NULL_NODE; - OnigCompAmbigCodes* ccs; - OnigCompAmbigCodeItem* ci; - OnigAmbigType amb; - - n = 0; - *root = NULL_NODE; - ptail = root; - - - for (amb = 0x01; amb <= ONIGENC_AMBIGUOUS_MATCH_LIMIT; amb <<= 1) { - if ((amb & ambig_flag) == 0) continue; - - ncode = ONIGENC_GET_ALL_COMP_AMBIG_CODES(enc, amb, &ccs); - for (i = 0; i < ncode; i++) { - if (onig_is_code_in_cc(enc, ccs[i].code, cc)) { - for (j = 0; j < ccs[i].n; j++) { - ci = &(ccs[i].items[j]); - if (ci->len > 1) { /* compound only */ - if (IS_CCLASS_NOT(cc)) clear_not_flag_cclass(cc, enc); - - clen = ci->len; - for (k = 0; k < clen; k++) { - len = ONIGENC_CODE_TO_MBC(enc, ci->code[k], buf); - - if (k == 0) { - snode = node_new_str_raw(buf, buf + len); - CHECK_NULL_RETURN_VAL(snode, ONIGERR_MEMORY); - } - else { - r = onig_node_str_cat(snode, buf, buf + len); - if (r < 0) return r; - } - } - - *ptail = node_new_alt(snode, NULL_NODE); - CHECK_NULL_RETURN_VAL(*ptail, ONIGERR_MEMORY); - ptail = &(NCONS(*ptail).right); - n++; - } - } - } - } - } - - return n; -} - - #ifdef USE_SHARED_CCLASS_TABLE #define THRESHOLD_RANGE_NUM_FOR_SHARE_CCLASS 8 @@ -4546,27 +4746,9 @@ static int type_cclass_hash(type_cclass_key* key) return val + (val >> 5); } -static int type_cclass_key_free(st_data_t x) -{ - xfree((void* )x); - return 0; -} - -static st_data_t type_cclass_key_clone(st_data_t x) -{ - type_cclass_key* new_key; - type_cclass_key* key = (type_cclass_key* )x; - - new_key = (type_cclass_key* )xmalloc(sizeof(type_cclass_key)); - *new_key = *key; - return (st_data_t )new_key; -} - static struct st_hash_type type_type_cclass_hash = { type_cclass_cmp, type_cclass_hash, - type_cclass_key_free, - type_cclass_key_clone }; static st_table* OnigTypeCClassTable; @@ -4580,14 +4762,18 @@ i_free_shared_class(type_cclass_key* key, Node* node, void* arg) if (IS_NOT_NULL(cc->mbuf)) xfree(cc->mbuf); xfree(node); } + + if (IS_NOT_NULL(key)) xfree(key); return ST_DELETE; } extern int -onig_free_shared_cclass_table() +onig_free_shared_cclass_table(void) { if (IS_NOT_NULL(OnigTypeCClassTable)) { onig_st_foreach(OnigTypeCClassTable, i_free_shared_class, 0); + onig_st_free_table(OnigTypeCClassTable); + OnigTypeCClassTable = NULL; } return 0; @@ -4667,23 +4853,36 @@ parse_exp(Node** np, OnigToken* tok, int term, case TK_RAW_BYTE: tk_raw_byte: { - *np = node_new_str_raw_char((UChar )tok->u.c); + *np = node_new_str_char((UChar )tok->u.c); CHECK_NULL_RETURN_VAL(*np, ONIGERR_MEMORY); len = 1; while (1) { + if (len >= ONIGENC_MBC_MINLEN(env->enc)) { + if (len == enc_len(env->enc, NSTRING(*np).s)) { + r = fetch_token(tok, src, end, env); + goto string_end; + } + } + r = fetch_token(tok, src, end, env); if (r < 0) return r; if (r != TK_RAW_BYTE) { -#ifndef NUMBERED_CHAR_IS_NOT_CASE_AMBIG - if (len >= enc_len(env->enc, NSTRING(*np).s)) { - NSTRING_CLEAR_RAW(*np); +#ifdef USE_PAD_TO_SHORT_BYTE_CHAR + int rem; + if (len < ONIGENC_MBC_MINLEN(env->enc)) { + rem = ONIGENC_MBC_MINLEN(env->enc) - len; + (void )node_str_head_pad(&NSTRING(*np), rem, (UChar )0); + if (len + rem == enc_len(env->enc, NSTRING(*np).s)) { + goto string_end; + } } #endif - goto string_end; + return ONIGERR_TOO_SHORT_MULTI_BYTE_STRING; } r = node_str_cat_char(*np, (UChar )tok->u.c); if (r < 0) return r; + len++; } } @@ -4741,7 +4940,7 @@ parse_exp(Node** np, OnigToken* tok, int term, int ctype, not; #ifdef USE_SHARED_CCLASS_TABLE - OnigCodePoint *sbr, *mbr; + const OnigCodePoint *sbr, *mbr; ctype = parse_ctype_to_enc_ctype(tok->u.subtype, ¬); r = ONIGENC_GET_CTYPE_CODE_RANGE(env->enc, ctype, &sbr, &mbr); @@ -4823,7 +5022,7 @@ parse_exp(Node** np, OnigToken* tok, int term, if (IS_IGNORECASE(env->option)) { int i, n, in_cc; - OnigPairAmbigCodes* ccs; + const OnigPairAmbigCodes* ccs; BitSetRef bs = cc->bs; OnigAmbigType amb; @@ -4854,24 +5053,6 @@ parse_exp(Node** np, OnigToken* tok, int term, } } } - - if (IS_IGNORECASE(env->option) && - (env->ambig_flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0) { - int res; - Node *alt_root, *work; - - res = make_compound_alt_node_from_cc(env->ambig_flag, env->enc, - cc, &alt_root); - if (res < 0) return res; - if (res > 0) { - work = node_new_alt(*np, alt_root); - if (IS_NULL(work)) { - onig_node_free(alt_root); - return ONIGERR_MEMORY; - } - *np = work; - } - } } break; @@ -4883,17 +5064,22 @@ parse_exp(Node** np, OnigToken* tok, int term, case TK_ANYCHAR_ANYTIME: *np = node_new_anychar(); CHECK_NULL_RETURN_VAL(*np, ONIGERR_MEMORY); - qn = node_new_qualifier(0, REPEAT_INFINITE, 0); + qn = node_new_quantifier(0, REPEAT_INFINITE, 0); CHECK_NULL_RETURN_VAL(qn, ONIGERR_MEMORY); - NQUALIFIER(qn).target = *np; + NQUANTIFIER(qn).target = *np; *np = qn; break; case TK_BACKREF: len = tok->u.backref.num; *np = node_new_backref(len, - (len > 1 ? tok->u.backref.refs : &(tok->u.backref.ref1)), - tok->u.backref.by_name, env); + (len > 1 ? tok->u.backref.refs : &(tok->u.backref.ref1)), + tok->u.backref.by_name, +#ifdef USE_BACKREF_AT_LEVEL + tok->u.backref.exist_level, + tok->u.backref.level, +#endif + env); CHECK_NULL_RETURN_VAL(*np, ONIGERR_MEMORY); break; @@ -4936,14 +5122,14 @@ parse_exp(Node** np, OnigToken* tok, int term, repeat: if (r == TK_OP_REPEAT || r == TK_INTERVAL) { - if (is_invalid_qualifier_target(*targetp)) + if (is_invalid_quantifier_target(*targetp)) return ONIGERR_TARGET_OF_REPEAT_OPERATOR_INVALID; - qn = node_new_qualifier(tok->u.repeat.lower, tok->u.repeat.upper, + qn = node_new_quantifier(tok->u.repeat.lower, tok->u.repeat.upper, (r == TK_INTERVAL ? 1 : 0)); CHECK_NULL_RETURN_VAL(qn, ONIGERR_MEMORY); - NQUALIFIER(qn).greedy = tok->u.repeat.greedy; - r = set_qualifier(qn, *targetp, group, env); + NQUANTIFIER(qn).greedy = tok->u.repeat.greedy; + r = set_quantifier(qn, *targetp, group, env); if (r < 0) return r; if (tok->u.repeat.possessive != 0) { diff --git a/ext/mbstring/oniguruma/regparse.h b/ext/mbstring/oniguruma/regparse.h index 1a4ac7dea2..b25618a33f 100644 --- a/ext/mbstring/oniguruma/regparse.h +++ b/ext/mbstring/oniguruma/regparse.h @@ -4,7 +4,7 @@ regparse.h - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2005 K.Kosako + * Copyright (c) 2002-2007 K.Kosako * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -37,7 +37,7 @@ #define N_CTYPE (1<< 2) #define N_ANYCHAR (1<< 3) #define N_BACKREF (1<< 4) -#define N_QUALIFIER (1<< 5) +#define N_QUANTIFIER (1<< 5) #define N_EFFECT (1<< 6) #define N_ANCHOR (1<< 7) #define N_LIST (1<< 8) @@ -52,7 +52,7 @@ #define NSTRING(node) ((node)->u.str) #define NCCLASS(node) ((node)->u.cclass) #define NCTYPE(node) ((node)->u.ctype) -#define NQUALIFIER(node) ((node)->u.qualifier) +#define NQUANTIFIER(node) ((node)->u.quantifier) #define NANCHOR(node) ((node)->u.anchor) #define NBACKREF(node) ((node)->u.backref) #define NEFFECT(node) ((node)->u.effect) @@ -67,7 +67,7 @@ #define CTYPE_XDIGIT (1<<6) #define CTYPE_NOT_XDIGIT (1<<7) -#define ANCHOR_ANYCHAR_STAR_MASK (ANCHOR_ANYCHAR_STAR | ANCHOR_ANYCHAR_STAR_PL) +#define ANCHOR_ANYCHAR_STAR_MASK (ANCHOR_ANYCHAR_STAR | ANCHOR_ANYCHAR_STAR_ML) #define ANCHOR_END_BUF_MASK (ANCHOR_END_BUF | ANCHOR_SEMI_END_BUF) #define EFFECT_MEMORY (1<<0) @@ -76,7 +76,7 @@ #define NODE_STR_MARGIN 16 #define NODE_STR_BUF_SIZE 24 /* sizeof(CClassNode) - sizeof(int)*4 */ -#define NODE_BACKREFS_SIZE 7 +#define NODE_BACKREFS_SIZE 6 #define NSTR_RAW (1<<0) /* by backslashed number */ #define NSTR_AMBIG (1<<1) @@ -124,12 +124,14 @@ typedef struct { int lower; int upper; int greedy; - int by_number; /* {n,m} */ int target_empty_info; struct _Node* head_exact; struct _Node* next_head_exact; int is_refered; /* include called node. don't eliminate even if {0} */ -} QualifierNode; +#ifdef USE_COMBINATION_EXPLOSION_CHECK + int comb_exp_check_num; /* 1,2,3...: check, 0: no check */ +#endif +} QuantifierNode; /* status bits */ #define NST_MIN_FIXED (1<<0) @@ -145,6 +147,8 @@ typedef struct { #define NST_NAMED_GROUP (1<<10) #define NST_NAME_REF (1<<11) #define NST_IN_REPEAT (1<<12) /* STK_REPEAT is nested in stack. */ +#define NST_NEST_LEVEL (1<<13) +#define NST_BY_NUMBER (1<<14) /* {n,m} */ #define SET_EFFECT_STATUS(node,f) (node)->u.effect.state |= (f) #define CLEAR_EFFECT_STATUS(node,f) (node)->u.effect.state &= ~(f) @@ -165,7 +169,9 @@ typedef struct { #define IS_CALL_RECURSION(cn) (((cn)->state & NST_RECURSION) != 0) #define IS_CALL_NAME_REF(cn) (((cn)->state & NST_NAME_REF) != 0) #define IS_BACKREF_NAME_REF(bn) (((bn)->state & NST_NAME_REF) != 0) -#define IS_QUALIFIER_IN_REPEAT(qn) (((qn)->state & NST_IN_REPEAT) != 0) +#define IS_BACKREF_NEST_LEVEL(bn) (((bn)->state & NST_NEST_LEVEL) != 0) +#define IS_QUANTIFIER_IN_REPEAT(qn) (((qn)->state & NST_IN_REPEAT) != 0) +#define IS_QUANTIFIER_BY_NUMBER(qn) (((qn)->state & NST_BY_NUMBER) != 0) typedef struct { int state; @@ -212,6 +218,7 @@ typedef struct { int back_num; int back_static[NODE_BACKREFS_SIZE]; int* back_dynamic; + int nest_level; } BackrefNode; typedef struct { @@ -223,15 +230,15 @@ typedef struct { typedef struct _Node { int type; union { - StrNode str; - CClassNode cclass; - QualifierNode qualifier; - EffectNode effect; + StrNode str; + CClassNode cclass; + QuantifierNode quantifier; + EffectNode effect; #ifdef USE_SUBEXP_CALL - CallNode call; + CallNode call; #endif - BackrefNode backref; - AnchorNode anchor; + BackrefNode backref; + AnchorNode anchor; struct { struct _Node* left; struct _Node* right; @@ -274,6 +281,12 @@ typedef struct { int mem_alloc; Node* mem_nodes_static[SCANENV_MEMNODES_SIZE]; Node** mem_nodes_dynamic; +#ifdef USE_COMBINATION_EXPLOSION_CHECK + int num_comb_exp_check; + int comb_exp_max_regnum; + int curr_max_regnum; + int has_recursion; +#endif } ScanEnv; @@ -290,11 +303,10 @@ typedef struct { extern int onig_renumber_name_table P_((regex_t* reg, GroupNumRemap* map)); #endif -extern int onig_is_code_in_cc P_((OnigEncoding enc, OnigCodePoint code, CClassNode* cc)); extern int onig_strncmp P_((const UChar* s1, const UChar* s2, int n)); extern void onig_scan_env_set_error_string P_((ScanEnv* env, int ecode, UChar* arg, UChar* arg_end)); extern int onig_scan_unsigned_number P_((UChar** src, const UChar* end, OnigEncoding enc)); -extern void onig_reduce_nested_qualifier P_((Node* pnode, Node* cnode)); +extern void onig_reduce_nested_quantifier P_((Node* pnode, Node* cnode)); extern void onig_node_conv_to_str_node P_((Node* node, int raw)); extern int onig_node_str_cat P_((Node* node, const UChar* s, const UChar* end)); extern void onig_node_free P_((Node* node)); @@ -303,7 +315,7 @@ extern Node* onig_node_new_anchor P_((int type)); extern Node* onig_node_new_str P_((const UChar* s, const UChar* end)); extern Node* onig_node_new_list P_((Node* left, Node* right)); extern void onig_node_str_clear P_((Node* node)); -extern int onig_free_node_list(); +extern int onig_free_node_list P_((void)); extern int onig_names_free P_((regex_t* reg)); extern int onig_parse_make_tree P_((Node** root, const UChar* pattern, const UChar* end, regex_t* reg, ScanEnv* env)); diff --git a/ext/mbstring/oniguruma/regposix.c b/ext/mbstring/oniguruma/regposix.c index 34cbeb9a46..a3bacf722e 100644 --- a/ext/mbstring/oniguruma/regposix.c +++ b/ext/mbstring/oniguruma/regposix.c @@ -2,7 +2,7 @@ regposix.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2005 K.Kosako + * Copyright (c) 2002-2006 K.Kosako * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -55,7 +55,7 @@ typedef struct { static int onig2posix_error_code(int code) { - static O2PERR o2p[] = { + static const O2PERR o2p[] = { { ONIG_MISMATCH, REG_NOMATCH }, { ONIG_NO_SUPPORT_CONFIG, REG_EONIG_INTERNAL }, { ONIGERR_MEMORY, REG_ESPACE }, @@ -192,7 +192,7 @@ regexec(regex_t* reg, const char* str, size_t nmatch, ENC_STRING_LEN(ONIG_C(reg)->enc, str, len); end = (UChar* )(str + len); r = onig_search(ONIG_C(reg), (UChar* )str, end, (UChar* )str, end, - (OnigRegion* )pmatch, options); + (OnigRegion* )pm, options); if (r >= 0) { r = 0; /* Match */ @@ -212,6 +212,11 @@ regexec(regex_t* reg, const char* str, size_t nmatch, if (pm != pmatch && pm != NULL) xfree(pm); +#if 0 + if (reg->re_nsub > nmatch - 1) + reg->re_nsub = (nmatch <= 1 ? 0 : nmatch - 1); +#endif + return r; } diff --git a/ext/mbstring/oniguruma/regsyntax.c b/ext/mbstring/oniguruma/regsyntax.c index a0f36b8c33..9114e39e6b 100644 --- a/ext/mbstring/oniguruma/regsyntax.c +++ b/ext/mbstring/oniguruma/regsyntax.c @@ -2,7 +2,7 @@ regsyntax.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2004 K.Kosako + * Copyright (c) 2002-2006 K.Kosako * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -29,6 +29,13 @@ #include "regint.h" +OnigSyntaxType OnigSyntaxASIS = { + 0 + , ONIG_SYN_OP2_INEFFECTIVE_ESCAPE + , 0 + , ONIG_OPTION_NONE +}; + OnigSyntaxType OnigSyntaxPosixBasic = { ( SYN_POSIX_COMMON_OP | ONIG_SYN_OP_ESC_LPAREN_SUBEXP | ONIG_SYN_OP_ESC_BRACE_INTERVAL ) @@ -63,7 +70,7 @@ OnigSyntaxType OnigSyntaxEmacs = { OnigSyntaxType OnigSyntaxGrep = { ( ONIG_SYN_OP_DOT_ANYCHAR | ONIG_SYN_OP_BRACKET_CC | ONIG_SYN_OP_POSIX_BRACKET | - ONIG_SYN_OP_BRACE_INTERVAL | ONIG_SYN_OP_ESC_LPAREN_SUBEXP | + ONIG_SYN_OP_ESC_BRACE_INTERVAL | ONIG_SYN_OP_ESC_LPAREN_SUBEXP | ONIG_SYN_OP_ESC_VBAR_ALT | ONIG_SYN_OP_ASTERISK_ZERO_INF | ONIG_SYN_OP_ESC_PLUS_ONE_INF | ONIG_SYN_OP_ESC_QMARK_ZERO_ONE | ONIG_SYN_OP_LINE_ANCHOR | @@ -110,6 +117,28 @@ OnigSyntaxType OnigSyntaxPerl = { , ONIG_OPTION_SINGLELINE }; +/* Perl + named group */ +OnigSyntaxType OnigSyntaxPerl_NG = { + (( SYN_GNU_REGEX_OP | ONIG_SYN_OP_QMARK_NON_GREEDY | + ONIG_SYN_OP_ESC_OCTAL3 | ONIG_SYN_OP_ESC_X_HEX2 | + ONIG_SYN_OP_ESC_X_BRACE_HEX8 | ONIG_SYN_OP_ESC_CONTROL_CHARS | + ONIG_SYN_OP_ESC_C_CONTROL ) + & ~ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END ) + , ( ONIG_SYN_OP2_ESC_CAPITAL_Q_QUOTE | + ONIG_SYN_OP2_QMARK_GROUP_EFFECT | ONIG_SYN_OP2_OPTION_PERL | + ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY | + ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT | + ONIG_SYN_OP2_CHAR_PROPERTY_PREFIX_IS | + ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP | + ONIG_SYN_OP2_ESC_K_NAMED_BACKREF | + ONIG_SYN_OP2_ESC_G_SUBEXP_CALL ) + , ( SYN_GNU_REGEX_BV | + ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP | + ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME ) + , ONIG_OPTION_SINGLELINE +}; + + extern int onig_set_default_syntax(OnigSyntaxType* syntax) diff --git a/ext/mbstring/oniguruma/regversion.c b/ext/mbstring/oniguruma/regversion.c index 5f15c10e65..5fad0cc18c 100644 --- a/ext/mbstring/oniguruma/regversion.c +++ b/ext/mbstring/oniguruma/regversion.c @@ -2,7 +2,7 @@ regversion.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2005 K.Kosako + * Copyright (c) 2002-2006 K.Kosako * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -47,7 +47,7 @@ onig_copyright(void) { static char s[58]; - sprintf(s, "Oniguruma %d.%d.%d : Copyright (C) 2002-2005 K.Kosako", + sprintf(s, "Oniguruma %d.%d.%d : Copyright (C) 2002-2006 K.Kosako", ONIGURUMA_VERSION_MAJOR, ONIGURUMA_VERSION_MINOR, ONIGURUMA_VERSION_TEENY); diff --git a/ext/mbstring/oniguruma/st.c b/ext/mbstring/oniguruma/st.c index 65c2cc58bd..2324da2635 100644 --- a/ext/mbstring/oniguruma/st.c +++ b/ext/mbstring/oniguruma/st.c @@ -56,8 +56,6 @@ static int numhash(long); static struct st_hash_type type_numhash = { numcmp, numhash, - st_nothing_key_free, - st_nothing_key_clone }; /* extern int strcmp(const char *, const char *); */ @@ -65,20 +63,6 @@ static int strhash(const char *); static struct st_hash_type type_strhash = { strcmp, strhash, - st_nothing_key_free, - st_nothing_key_clone -}; - -static int strend_cmp(st_strend_key*, st_strend_key*); -static int strend_hash(st_strend_key*); -static int strend_key_free(st_data_t key); -static st_data_t strend_key_clone(st_data_t x); - -static struct st_hash_type type_strend_hash = { - strend_cmp, - strend_hash, - strend_key_free, - strend_key_clone }; static void rehash(st_table *); @@ -100,7 +84,7 @@ static void rehash(st_table *); /* Table of prime numbers 2^n+a, 2<=n<=30. */ -static long primes[] = { +static const long primes[] = { 8 + 3, 16 + 3, 32 + 5, @@ -228,13 +212,6 @@ st_init_strtable_with_size(size) return st_init_table_with_size(&type_strhash, size); } -st_table* -st_init_strend_table_with_size(size) - int size; -{ - return st_init_table_with_size(&type_strend_hash, size); -} - void st_free_table(table) st_table *table; @@ -246,7 +223,6 @@ st_free_table(table) ptr = table->bins[i]; while (ptr != 0) { next = ptr->next; - table->type->key_free(ptr->key); free(ptr); ptr = next; } @@ -297,21 +273,6 @@ st_lookup(table, key, value) } } -int -st_lookup_strend(table, str_key, end_key, value) - st_table *table; - const unsigned char* str_key; - const unsigned char* end_key; - st_data_t *value; -{ - st_strend_key key; - - key.s = (unsigned char* )str_key; - key.end = (unsigned char* )end_key; - - return st_lookup(table, (st_data_t )(&key), value); -} - #define ADD_DIRECT(table, key, value, hash_val, bin_pos)\ do {\ st_table_entry *entry;\ @@ -352,22 +313,6 @@ st_insert(table, key, value) } } -int -st_insert_strend(table, str_key, end_key, value) - st_table *table; - const unsigned char* str_key; - const unsigned char* end_key; - st_data_t value; -{ - st_strend_key* key; - - key = alloc(st_strend_key); - key->s = (unsigned char* )str_key; - key->end = (unsigned char* )end_key; - - return st_insert(table, (st_data_t )key, value); -} - void st_add_direct(table, key, value) st_table *table; @@ -381,21 +326,6 @@ st_add_direct(table, key, value) ADD_DIRECT(table, key, value, hash_val, bin_pos); } -void -st_add_direct_strend(table, str_key, end_key, value) - st_table *table; - const unsigned char* str_key; - const unsigned char* end_key; - st_data_t value; -{ - st_strend_key* key; - - key = alloc(st_strend_key); - key->s = (unsigned char* )str_key; - key->end = (unsigned char* )end_key; - st_add_direct(table, (st_data_t )key, value); -} - static void rehash(table) register st_table *table; @@ -455,7 +385,6 @@ st_copy(old_table) return 0; } *entry = *ptr; - entry->key = old_table->type->key_clone(ptr->key); entry->next = new_table->bins[i]; new_table->bins[i] = entry; ptr = ptr->next; @@ -556,7 +485,7 @@ st_cleanup_safe(table, never) table->num_entries = num_entries; } -void +int st_foreach(table, func, arg) st_table *table; int (*func)(); @@ -569,7 +498,7 @@ st_foreach(table, func, arg) for(i = 0; i < table->num_bins; i++) { last = 0; for(ptr = table->bins[i]; ptr != 0;) { - retval = (*func)(ptr->key, ptr->record, arg, 0); + retval = (*func)(ptr->key, ptr->record, arg); switch (retval) { case ST_CHECK: /* check if hash is modified during iteration */ tmp = 0; @@ -580,8 +509,7 @@ st_foreach(table, func, arg) } if (!tmp) { /* call func with error notice */ - retval = (*func)(0, 0, arg, 1); - return; + return 1; } /* fall through */ case ST_CONTINUE: @@ -589,7 +517,7 @@ st_foreach(table, func, arg) ptr = ptr->next; break; case ST_STOP: - return; + return 0; case ST_DELETE: tmp = ptr; if (last == 0) { @@ -599,12 +527,12 @@ st_foreach(table, func, arg) last->next = ptr->next; } ptr = ptr->next; - table->type->key_free(tmp->key); free(tmp); table->num_entries--; } } } + return 0; } static int @@ -659,59 +587,3 @@ numhash(n) { return n; } - -extern int -st_nothing_key_free(st_data_t key) { return 0; } - -extern st_data_t -st_nothing_key_clone(st_data_t x) { return x; } - -static int strend_cmp(st_strend_key* x, st_strend_key* y) -{ - unsigned char *p, *q; - int c; - - if ((x->end - x->s) != (y->end - y->s)) - return 1; - - p = x->s; - q = y->s; - while (p < x->end) { - c = (int )*p - (int )*q; - if (c != 0) return c; - - p++; q++; - } - - return 0; -} - -static int strend_hash(st_strend_key* x) -{ - int val; - unsigned char *p; - - val = 0; - p = x->s; - while (p < x->end) { - val = val * 997 + (int )*p++; - } - - return val + (val >> 5); -} - -static int strend_key_free(st_data_t x) -{ - xfree((void* )x); - return 0; -} - -static st_data_t strend_key_clone(st_data_t x) -{ - st_strend_key* new_key; - st_strend_key* key = (st_strend_key* )x; - - new_key = alloc(st_strend_key); - *new_key = *key; - return (st_data_t )new_key; -} diff --git a/ext/mbstring/oniguruma/st.h b/ext/mbstring/oniguruma/st.h index c5cc4e625e..da65e7fef8 100644 --- a/ext/mbstring/oniguruma/st.h +++ b/ext/mbstring/oniguruma/st.h @@ -14,8 +14,6 @@ typedef struct st_table st_table; struct st_hash_type { int (*compare)(); int (*hash)(); - int (*key_free)(); - st_data_t (*key_clone)(); }; struct st_table { @@ -25,11 +23,6 @@ struct st_table { struct st_table_entry **bins; }; -typedef struct { - unsigned char* s; - unsigned char* end; -} st_strend_key; - #define st_is_member(table,key) st_lookup(table,key,(st_data_t *)0) enum st_retval {ST_CONTINUE, ST_STOP, ST_DELETE, ST_CHECK}; @@ -51,23 +44,16 @@ st_table *st_init_numtable _((void)); st_table *st_init_numtable_with_size _((int)); st_table *st_init_strtable _((void)); st_table *st_init_strtable_with_size _((int)); -st_table *st_init_strend_table_with_size _((int)); int st_delete _((st_table *, st_data_t *, st_data_t *)); int st_delete_safe _((st_table *, st_data_t *, st_data_t *, st_data_t)); int st_insert _((st_table *, st_data_t, st_data_t)); -int st_insert_strend _((st_table *, const unsigned char*, const unsigned char*, st_data_t)); int st_lookup _((st_table *, st_data_t, st_data_t *)); -int st_lookup_strend _((st_table *, const unsigned char*, const unsigned char*, st_data_t*)); -void st_foreach _((st_table *, int (*)(ANYARGS), st_data_t)); +int st_foreach _((st_table *, int (*)(ANYARGS), st_data_t)); void st_add_direct _((st_table *, st_data_t, st_data_t)); -void st_add_direct_strend _((st_table *, const unsigned char*, const unsigned char*, st_data_t)); void st_free_table _((st_table *)); void st_cleanup_safe _((st_table *, st_data_t)); st_table *st_copy _((st_table *)); -extern st_data_t st_nothing_key_clone _((st_data_t key)); -extern int st_nothing_key_free _((st_data_t key)); - #define ST_NUMCMP ((int (*)()) 0) #define ST_NUMHASH ((int (*)()) -2) diff --git a/ext/mbstring/php_mbregex.c b/ext/mbstring/php_mbregex.c index d868ab3843..67efd96acb 100644 --- a/ext/mbstring/php_mbregex.c +++ b/ext/mbstring/php_mbregex.c @@ -718,6 +718,13 @@ static void _php_mb_regex_ereg_exec(INTERNAL_FUNCTION_PARAMETERS, int icase) convert_to_string_ex(arg_pattern); /* don't bother doing an extended regex with just a number */ } + + if (!Z_STRVAL_PP(arg_pattern) || Z_STRLEN_PP(arg_pattern) == 0) { + php_error_docref(NULL TSRMLS_CC, E_WARNING, "empty pattern"); + RETVAL_FALSE; + goto out; + } + re = php_mbregex_compile_pattern(Z_STRVAL_PP(arg_pattern), Z_STRLEN_PP(arg_pattern), options, MBREX(current_mbctype), MBREX(regex_default_syntax) TSRMLS_CC); if (re == NULL) { RETVAL_FALSE; diff --git a/ext/mbstring/php_unicode.c b/ext/mbstring/php_unicode.c index 406069576e..2abad1330b 100644 --- a/ext/mbstring/php_unicode.c +++ b/ext/mbstring/php_unicode.c @@ -98,7 +98,6 @@ static int prop_lookup(unsigned long code, unsigned long n) } - MBSTRING_API int php_unicode_is_prop(unsigned long code, unsigned long mask1, unsigned long mask2) { @@ -277,6 +276,11 @@ MBSTRING_API char *php_unicode_convert_case(int case_mode, const char *srcstr, s size_t i; enum mbfl_no_encoding _src_encoding = mbfl_name2no_encoding(src_encoding); + if (_src_encoding == mbfl_no_encoding_invalid) { + php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", src_encoding); + return NULL; + } + unicode = php_mb_convert_encoding(srcstr, srclen, "UCS-4BE", src_encoding, &unicode_len TSRMLS_CC); if (unicode == NULL) return NULL; diff --git a/ext/mbstring/tests/bug43994.phpt b/ext/mbstring/tests/bug43994.phpt index 9484a47849..39a39f6425 100644 --- a/ext/mbstring/tests/bug43994.phpt +++ b/ext/mbstring/tests/bug43994.phpt @@ -39,83 +39,91 @@ foreach($inputs as $input) { ?> --EXPECTF---- + -- Iteration 1 -- Without $regs arg: -int(1) + +Warning: mb_ereg(): empty pattern in %s on line %d +bool(false) With $regs arg: -int(1) -array(1) { - [0]=> - bool(false) -} + +Warning: mb_ereg(): empty pattern in %s on line %d +bool(false) +NULL -- Iteration 2 -- Without $regs arg: -int(1) + +Warning: mb_ereg(): empty pattern in %s on line %d +bool(false) With $regs arg: -int(1) -array(1) { - [0]=> - bool(false) -} + +Warning: mb_ereg(): empty pattern in %s on line %d +bool(false) +NULL -- Iteration 3 -- Without $regs arg: -int(1) + +Warning: mb_ereg(): empty pattern in %s on line %d +bool(false) With $regs arg: -int(1) -array(1) { - [0]=> - bool(false) -} + +Warning: mb_ereg(): empty pattern in %s on line %d +bool(false) +NULL -- Iteration 4 -- Without $regs arg: -int(1) + +Warning: mb_ereg(): empty pattern in %s on line %d +bool(false) With $regs arg: -int(1) -array(1) { - [0]=> - bool(false) -} + +Warning: mb_ereg(): empty pattern in %s on line %d +bool(false) +NULL -- Iteration 5 -- Without $regs arg: -int(1) + +Warning: mb_ereg(): empty pattern in %s on line %d +bool(false) With $regs arg: -int(1) -array(1) { - [0]=> - bool(false) -} + +Warning: mb_ereg(): empty pattern in %s on line %d +bool(false) +NULL -- Iteration 6 -- Without $regs arg: -int(1) + +Warning: mb_ereg(): empty pattern in %s on line %d +bool(false) With $regs arg: -int(1) -array(1) { - [0]=> - bool(false) -} + +Warning: mb_ereg(): empty pattern in %s on line %d +bool(false) +NULL -- Iteration 7 -- Without $regs arg: -int(1) + +Warning: mb_ereg(): empty pattern in %s on line %d +bool(false) With $regs arg: -int(1) -array(1) { - [0]=> - bool(false) -} + +Warning: mb_ereg(): empty pattern in %s on line %d +bool(false) +NULL -- Iteration 8 -- Without $regs arg: -int(1) + +Warning: mb_ereg(): empty pattern in %s on line %d +bool(false) With $regs arg: -int(1) -array(1) { - [0]=> - bool(false) -} +Warning: mb_ereg(): empty pattern in %s on line %d +bool(false) +NULL diff --git a/ext/mbstring/tests/bug43998.phpt b/ext/mbstring/tests/bug43998.phpt index 87b354b49e..da5e599da0 100644 --- a/ext/mbstring/tests/bug43998.phpt +++ b/ext/mbstring/tests/bug43998.phpt @@ -29,55 +29,43 @@ foreach($inputs as $input) { }; ?> --EXPECTF-- --- Iteration 1 -- -Warning: mb_strtolower(): Illegal character encoding specified in %s on line %d +-- Iteration 1 -- Warning: mb_strtolower(): Unknown encoding "12345" in %s on line %d bool(false) -Warning: mb_strtoupper(): Illegal character encoding specified in %s on line %d - Warning: mb_strtoupper(): Unknown encoding "12345" in %s on line %d bool(false) -- Iteration 2 -- -Warning: mb_strtolower(): Illegal character encoding specified in %s on line %d - Warning: mb_strtolower(): Unknown encoding "1.23456789E-9" in %s on line %d bool(false) -Warning: mb_strtoupper(): Illegal character encoding specified in %s on line %d - Warning: mb_strtoupper(): Unknown encoding "1.23456789E-9" in %s on line %d bool(false) -- Iteration 3 -- -Warning: mb_strtolower(): Illegal character encoding specified in %s on line %d - Warning: mb_strtolower(): Unknown encoding "1" in %s on line %d bool(false) -Warning: mb_strtoupper(): Illegal character encoding specified in %s on line %d - Warning: mb_strtoupper(): Unknown encoding "1" in %s on line %d bool(false) -- Iteration 4 -- -Warning: mb_strtolower(): Illegal character encoding specified in %s on line %d -string(12) "hello, world" +Warning: mb_strtolower(): Unknown encoding "" in %s on line %d +bool(false) -Warning: mb_strtoupper(): Illegal character encoding specified in %s on line %d -string(12) "HELLO, WORLD" +Warning: mb_strtoupper(): Unknown encoding "" in %s on line %d +bool(false) -- Iteration 5 -- -Warning: mb_strtolower(): Illegal character encoding specified in %s on line %d -string(12) "hello, world" - -Warning: mb_strtoupper(): Illegal character encoding specified in %s on line %d -string(12) "HELLO, WORLD" +Warning: mb_strtolower(): Unknown encoding "" in %s on line %d +bool(false) +Warning: mb_strtoupper(): Unknown encoding "" in %s on line %d +bool(false) diff --git a/ext/mbstring/tests/mb_strstr.phpt b/ext/mbstring/tests/mb_strstr.phpt new file mode 100644 index 0000000000..fcf9e85bc2 --- /dev/null +++ b/ext/mbstring/tests/mb_strstr.phpt @@ -0,0 +1,35 @@ +--TEST-- +mb_strstr() +--SKIPIF-- + +--INI-- +mbstring.internal_encoding=UTF-8 +--FILE-- + +--EXPECT-- +string(18) "おかきくけこ" +string(18) "おかきくけこ" +string(12) "あいうえ" +string(18) "おかきくけこ" +string(12) "あいうえ" +string(18) "おかきくけこ" +string(18) "おかきくけこ" +string(12) "あいうえ" -- 2.50.1