From 8ae17615d7177006358696e2a6ba0f10fd5f2391 Mon Sep 17 00:00:00 2001 From: Moriyoshi Koizumi Date: Wed, 18 Mar 2009 17:44:25 +0000 Subject: [PATCH] - Add support for CP850 encoding (patch by Denis Giffeler) --- NEWS | 3 + Zend/zend_multibyte.c | 10 ++ ext/mbstring/config.m4 | 1 + ext/mbstring/libmbfl/filters/Makefile.am | 3 + ext/mbstring/libmbfl/filters/Makefile.bcc32 | 3 +- ext/mbstring/libmbfl/filters/mbfilter_cp850.c | 147 ++++++++++++++++++ ext/mbstring/libmbfl/filters/mbfilter_cp850.h | 37 +++++ .../libmbfl/filters/unicode_table_cp850.h | 52 +++++++ ext/mbstring/libmbfl/mbfl/mbfl_consts.h | 1 + ext/mbstring/libmbfl/mbfl/mbfl_convert.c | 3 + ext/mbstring/libmbfl/mbfl/mbfl_encoding.c | 2 + ext/mbstring/libmbfl/mbfl/mbfl_encoding.h | 1 + ext/mbstring/libmbfl/mbfl/mbfl_ident.c | 2 + ext/mbstring/mbstring.dsp | 12 ++ 14 files changed, 276 insertions(+), 1 deletion(-) create mode 100644 ext/mbstring/libmbfl/filters/mbfilter_cp850.c create mode 100644 ext/mbstring/libmbfl/filters/mbfilter_cp850.h create mode 100644 ext/mbstring/libmbfl/filters/unicode_table_cp850.h diff --git a/NEWS b/NEWS index 790ff0a501..35afa63d87 100644 --- a/NEWS +++ b/NEWS @@ -9,6 +9,9 @@ PHP NEWS - Added LIBXML_LOADED_VERSION constant (libxml2 version currently used). (Rob) - Added JSON_FORCE_OBJECT flag to json_encode(). (Scott, Richard Quadling) +- Added support for CP850 encoding in mbstring extension. + (Denis Giffeler, Moriyoshi) + - Fixed bug #47678 (Allow loadExtension to be disabled in SQLite3). (Scott) - Fixed bug #47664 (get_class returns NULL instead of FALSE). (Dmitry) - Fixed bug #47593 (interface_exists() returns false when using absolute diff --git a/Zend/zend_multibyte.c b/Zend/zend_multibyte.c index a057c6995a..2f39c172ef 100644 --- a/Zend/zend_multibyte.c +++ b/Zend/zend_multibyte.c @@ -453,6 +453,15 @@ static zend_encoding encoding_armscii8 = { 1 }; +static const char *cp850_aliases[] = {"IBM850", NULL}; +static zend_encoding encoding_cp850 = { + NULL, + NULL, + "CP850", + (const char *(*)[])&cp850_aliases, + 1 +}; + static zend_encoding *zend_encoding_table[] = { &encoding_ucs4, &encoding_ucs4be, @@ -501,6 +510,7 @@ static zend_encoding *zend_encoding_table[] = { &encoding_koi8u, &encoding_armscii8, &encoding_cp1254, + &encoding_cp850, NULL }; diff --git a/ext/mbstring/config.m4 b/ext/mbstring/config.m4 index 0336121c20..6c79ba3973 100644 --- a/ext/mbstring/config.m4 +++ b/ext/mbstring/config.m4 @@ -271,6 +271,7 @@ AC_DEFUN([PHP_MBSTRING_SETUP_LIBMBFL], [ libmbfl/filters/mbfilter_utf8.c libmbfl/filters/mbfilter_uuencode.c libmbfl/filters/mbfilter_koi8u.c + libmbfl/filters/mbfilter_cp850.c libmbfl/mbfl/mbfilter.c libmbfl/mbfl/mbfilter_8bit.c libmbfl/mbfl/mbfilter_pass.c diff --git a/ext/mbstring/libmbfl/filters/Makefile.am b/ext/mbstring/libmbfl/filters/Makefile.am index 8fa046f923..802af4e61d 100644 --- a/ext/mbstring/libmbfl/filters/Makefile.am +++ b/ext/mbstring/libmbfl/filters/Makefile.am @@ -52,6 +52,7 @@ libmbfl_filters_la_SOURCES=mbfilter_cp936.c \ mbfilter_koi8r.c \ mbfilter_koi8u.c \ mbfilter_armscii8.c \ + mbfilter_cp850.c \ html_entities.c \ cp932_table.h \ html_entities.h \ @@ -105,6 +106,7 @@ libmbfl_filters_la_SOURCES=mbfilter_cp936.c \ mbfilter_utf8.h \ mbfilter_uuencode.h \ mbfilter_cp51932.h \ + mbfilter_cp850.h \ unicode_prop.h \ unicode_table_big5.h \ unicode_table_cns11643.h \ @@ -131,6 +133,7 @@ libmbfl_filters_la_SOURCES=mbfilter_cp936.c \ unicode_table_koi8r.h \ unicode_table_koi8u.h \ unicode_table_armscii8.h \ + unicode_table_cp850.h \ unicode_table_uhc.h mbfilter_iso8859_2.c: unicode_table_iso8859_2.h diff --git a/ext/mbstring/libmbfl/filters/Makefile.bcc32 b/ext/mbstring/libmbfl/filters/Makefile.bcc32 index 302436f11a..841c09632a 100644 --- a/ext/mbstring/libmbfl/filters/Makefile.bcc32 +++ b/ext/mbstring/libmbfl/filters/Makefile.bcc32 @@ -49,7 +49,8 @@ OBJS=mbfilter_cp936.obj \ mbfilter_koi8r.obj \ mbfilter_koi8u.obj \ html_entities.obj \ - mbfilter_armscii8.obj + mbfilter_armscii8.obj \ + mbfilter_cp850.obj all: $(OBJS) diff --git a/ext/mbstring/libmbfl/filters/mbfilter_cp850.c b/ext/mbstring/libmbfl/filters/mbfilter_cp850.c new file mode 100644 index 0000000000..5388c048b1 --- /dev/null +++ b/ext/mbstring/libmbfl/filters/mbfilter_cp850.c @@ -0,0 +1,147 @@ +/* + * "streamable kanji code filter and converter" + * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved. + * + * LICENSE NOTICES + * + * This file is part of "streamable kanji code filter and converter", + * which is distributed under the terms of GNU Lesser General Public + * License (version 2) as published by the Free Software Foundation. + * + * This software is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with "streamable kanji code filter and converter"; + * if not, write to the Free Software Foundation, Inc., 59 Temple Place, + * Suite 330, Boston, MA 02111-1307 USA + * + * The author of this part: Den V. Tsopa + * Adaption for CP850: D. Giffeler + * + */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include "mbfilter.h" +#include "mbfilter_cp850.h" +#include "unicode_table_cp850.h" + +static int mbfl_filt_ident_cp850(int c, mbfl_identify_filter *filter); + +static const char *mbfl_encoding_cp850_aliases[] = {"CP850", "CP-850", "IBM-850", NULL}; + +const mbfl_encoding mbfl_encoding_cp850 = { + mbfl_no_encoding_cp850, + "CP850", + "CP850", + (const char *(*)[])&mbfl_encoding_cp850_aliases, + NULL, + MBFL_ENCTYPE_SBCS +}; + +const struct mbfl_identify_vtbl vtbl_identify_cp850 = { + mbfl_no_encoding_cp850, + mbfl_filt_ident_common_ctor, + mbfl_filt_ident_common_dtor, + mbfl_filt_ident_cp850 +}; + +const struct mbfl_convert_vtbl vtbl_wchar_cp850 = { + mbfl_no_encoding_wchar, + mbfl_no_encoding_cp850, + mbfl_filt_conv_common_ctor, + mbfl_filt_conv_common_dtor, + mbfl_filt_conv_wchar_cp850, + mbfl_filt_conv_common_flush +}; + +const struct mbfl_convert_vtbl vtbl_cp850_wchar = { + mbfl_no_encoding_cp850, + mbfl_no_encoding_wchar, + mbfl_filt_conv_common_ctor, + mbfl_filt_conv_common_dtor, + mbfl_filt_conv_cp850_wchar, + mbfl_filt_conv_common_flush +}; + +#define CK(statement) do { if ((statement) < 0) return (-1); } while (0) + +/* + * cp850 => wchar + */ +int +mbfl_filt_conv_cp850_wchar(int c, mbfl_convert_filter *filter) +{ + int s; + + if (c >= 0 && c < cp850_ucs_table_min) { + s = c; + } else if (c >= cp850_ucs_table_min && c < 0x100) { + s = cp850_ucs_table[c - cp850_ucs_table_min]; + if (s <= 0) { + s = c; + s &= MBFL_WCSPLANE_MASK; + s |= MBFL_WCSPLANE_CP850; + } + } else { + s = c; + s &= MBFL_WCSGROUP_MASK; + s |= MBFL_WCSGROUP_THROUGH; + } + + CK((*filter->output_function)(s, filter->data)); + + return c; +} + +/* + * wchar => cp850 + */ +int +mbfl_filt_conv_wchar_cp850(int c, mbfl_convert_filter *filter) +{ + int s, n; + + if (c < 0x80) { + s = c; + } else { + s = -1; + n = cp850_ucs_table_len-1; + while (n >= 0) { + if (c == cp850_ucs_table[n]) { + s = cp850_ucs_table_min + n; + break; + } + n--; + } + if (s <= 0 && (c & ~MBFL_WCSPLANE_MASK) == MBFL_WCSPLANE_CP850) { + s = c & MBFL_WCSPLANE_MASK; + } + } + + if (s >= 0) { + CK((*filter->output_function)(s, filter->data)); + } else { + if (filter->illegal_mode != MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE) { + CK(mbfl_filt_conv_illegal_output(c, filter)); + } + } + + return c; +} + +static int mbfl_filt_ident_cp850(int c, mbfl_identify_filter *filter) +{ + if (c >= 0x80 && c < 0xff) + filter->flag = 0; + else + filter->flag = 1; /* not it */ + return c; +} + + diff --git a/ext/mbstring/libmbfl/filters/mbfilter_cp850.h b/ext/mbstring/libmbfl/filters/mbfilter_cp850.h new file mode 100644 index 0000000000..a1e0f9c0c2 --- /dev/null +++ b/ext/mbstring/libmbfl/filters/mbfilter_cp850.h @@ -0,0 +1,37 @@ +/* + * "streamable kanji code filter and converter" + * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved. + * + * LICENSE NOTICES + * + * This file is part of "streamable kanji code filter and converter", + * which is distributed under the terms of GNU Lesser General Public + * License (version 2) as published by the Free Software Foundation. + * + * This software is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with "streamable kanji code filter and converter"; + * if not, write to the Free Software Foundation, Inc., 59 Temple Place, + * Suite 330, Boston, MA 02111-1307 USA + * + * The author of this part: Den V. Tsopa + * Adaption for CP850: D. Giffeler + * + */ + +#ifndef MBFL_MBFILTER_CP850_H +#define MBFL_MBFILTER_CP850_H + +extern const mbfl_encoding mbfl_encoding_cp850; +extern const struct mbfl_identify_vtbl vtbl_identify_cp850; +extern const struct mbfl_convert_vtbl vtbl_wchar_cp850; +extern const struct mbfl_convert_vtbl vtbl_cp850_wchar; + +int mbfl_filt_conv_cp850_wchar(int c, mbfl_convert_filter *filter); +int mbfl_filt_conv_wchar_cp850(int c, mbfl_convert_filter *filter); + +#endif /* MBFL_MBFILTER_CP850_H */ diff --git a/ext/mbstring/libmbfl/filters/unicode_table_cp850.h b/ext/mbstring/libmbfl/filters/unicode_table_cp850.h new file mode 100644 index 0000000000..6c60ae03e0 --- /dev/null +++ b/ext/mbstring/libmbfl/filters/unicode_table_cp850.h @@ -0,0 +1,52 @@ +/* + * "streamable kanji code filter and converter" + * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved. + * + * LICENSE NOTICES + * + * This file is part of "streamable kanji code filter and converter", + * which is distributed under the terms of GNU Lesser General Public + * License (version 2) as published by the Free Software Foundation. + * + * This software is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with "streamable kanji code filter and converter"; + * if not, write to the Free Software Foundation, Inc., 59 Temple Place, + * Suite 330, Boston, MA 02111-1307 USA + * + * The author of this part: Den V. Tsopa + * Adaption for CP850: D. Giffeler + * + */ + +#ifndef UNICODE_TABLE_CP850_H +#define UNICODE_TABLE_CP850_H + +/* cp850_DOS to Unicode table */ +static const unsigned short cp850_ucs_table[] = { + 0x00c7, 0x00fc, 0x00e9, 0x00e2, 0x00e4, 0x00e0, 0x00e5, 0x00e7 +, 0x00ea, 0x00eb, 0x00e8, 0x00ef, 0x00ee, 0x00ec, 0x00c4, 0x00c5 +, 0x00c9, 0x00e6, 0x00c6, 0x00f4, 0x00f6, 0x00f2, 0x00fb, 0x00f9 +, 0x00ff, 0x00d6, 0x00dc, 0x00f8, 0x00a3, 0x00d8, 0x00d7, 0x0192 +, 0x00e1, 0x00ed, 0x00f3, 0x00fa, 0x00f1, 0x00d1, 0x00aa, 0x00ba +, 0x00bf, 0x00ae, 0x00ac, 0x00bd, 0x00bc, 0x00a1, 0x00ab, 0x00bb +, 0x2591, 0x2592, 0x2593, 0x2502, 0x2524, 0x00c1, 0x00c2, 0x00c0 +, 0x00a9, 0x2563, 0x2551, 0x2557, 0x255d, 0x00a2, 0x00a5, 0x2510 +, 0x2514, 0x2534, 0x252c, 0x251c, 0x2500, 0x253c, 0x00e3, 0x00c3 +, 0x255a, 0x2554, 0x2569, 0x2566, 0x2560, 0x2550, 0x256c, 0x00a4 +, 0x00f0, 0x00d0, 0x00ca, 0x00cb, 0x00c8, 0x0131, 0x00cd, 0x00ce +, 0x00cf, 0x2518, 0x250c, 0x2588, 0x2584, 0x00a6, 0x00cc, 0x2580 +, 0x00d3, 0x00df, 0x00d4, 0x00d2, 0x00f5, 0x00d5, 0x00b5, 0x00fe +, 0x00de, 0x00da, 0x00db, 0x00d9, 0x00fd, 0x00dd, 0x00af, 0x00b4 +, 0x00ad, 0x00b1, 0x2017, 0x00be, 0x00b6, 0x00a7, 0x00f7, 0x00b8 +, 0x00b0, 0x00a8, 0x00b7, 0x00b9, 0x00b3, 0x00b2, 0x25a0, 0x00a0 +}; +static const int cp850_ucs_table_min = 0x80; +static const int cp850_ucs_table_len = (sizeof (cp850_ucs_table) / sizeof (unsigned short)); +static const int cp850_ucs_table_max = 0x80 + (sizeof (cp850_ucs_table) / sizeof (unsigned short)); + +#endif /* UNICODE_TABLE_CP850_H */ diff --git a/ext/mbstring/libmbfl/mbfl/mbfl_consts.h b/ext/mbstring/libmbfl/mbfl/mbfl_consts.h index afb5680d68..b6c0bb2d87 100644 --- a/ext/mbstring/libmbfl/mbfl/mbfl_consts.h +++ b/ext/mbstring/libmbfl/mbfl/mbfl_consts.h @@ -79,6 +79,7 @@ #define MBFL_WCSPLANE_ARMSCII8 0x70fb0000 #define MBFL_WCSPLANE_KOI8U 0x70fc0000 #define MBFL_WCSPLANE_CP1254 0x70fd0000 /* 00h - FFh */ +#define MBFL_WCSPLANE_CP850 0x70fe0000 /* 00h - FFh */ #define MBFL_WCSGROUP_MASK 0xffffff #define MBFL_WCSGROUP_UCS4MAX 0x70000000 #define MBFL_WCSGROUP_WCHARMAX 0x78000000 diff --git a/ext/mbstring/libmbfl/mbfl/mbfl_convert.c b/ext/mbstring/libmbfl/mbfl/mbfl_convert.c index 63003d0f0a..725a674b39 100644 --- a/ext/mbstring/libmbfl/mbfl/mbfl_convert.c +++ b/ext/mbstring/libmbfl/mbfl/mbfl_convert.c @@ -93,6 +93,7 @@ #include "filters/mbfilter_ucs2.h" #include "filters/mbfilter_htmlent.h" #include "filters/mbfilter_armscii8.h" +#include "filters/mbfilter_cp850.h" static void mbfl_convert_filter_reset_vtbl(mbfl_convert_filter *filter); @@ -223,6 +224,8 @@ const struct mbfl_convert_vtbl *mbfl_convert_filter_list[] = { &vtbl_wchar_byte2le, &vtbl_armscii8_wchar, &vtbl_wchar_armscii8, + &vtbl_cp850_wchar, + &vtbl_wchar_cp850, &vtbl_pass, NULL }; diff --git a/ext/mbstring/libmbfl/mbfl/mbfl_encoding.c b/ext/mbstring/libmbfl/mbfl/mbfl_encoding.c index 51cea89a90..76956f0530 100644 --- a/ext/mbstring/libmbfl/mbfl/mbfl_encoding.c +++ b/ext/mbstring/libmbfl/mbfl/mbfl_encoding.c @@ -100,6 +100,7 @@ #include "filters/mbfilter_ucs2.h" #include "filters/mbfilter_htmlent.h" #include "filters/mbfilter_armscii8.h" +#include "filters/mbfilter_cp850.h" #ifndef HAVE_STRCASECMP #ifdef HAVE_STRICMP @@ -186,6 +187,7 @@ static const mbfl_encoding *mbfl_encoding_ptr_list[] = { &mbfl_encoding_koi8r, &mbfl_encoding_koi8u, &mbfl_encoding_armscii8, + &mbfl_encoding_cp850, NULL }; diff --git a/ext/mbstring/libmbfl/mbfl/mbfl_encoding.h b/ext/mbstring/libmbfl/mbfl/mbfl_encoding.h index b3b1750463..2599e1107e 100644 --- a/ext/mbstring/libmbfl/mbfl/mbfl_encoding.h +++ b/ext/mbstring/libmbfl/mbfl/mbfl_encoding.h @@ -103,6 +103,7 @@ enum mbfl_no_encoding { mbfl_no_encoding_koi8u, mbfl_no_encoding_8859_16, mbfl_no_encoding_armscii8, + mbfl_no_encoding_cp850, mbfl_no_encoding_charset_max }; diff --git a/ext/mbstring/libmbfl/mbfl/mbfl_ident.c b/ext/mbstring/libmbfl/mbfl/mbfl_ident.c index bcfd4b2acc..4f3bd5c58d 100644 --- a/ext/mbstring/libmbfl/mbfl/mbfl_ident.c +++ b/ext/mbstring/libmbfl/mbfl/mbfl_ident.c @@ -92,6 +92,7 @@ #include "filters/mbfilter_ucs2.h" #include "filters/mbfilter_htmlent.h" #include "filters/mbfilter_armscii8.h" +#include "filters/mbfilter_cp850.h" static const struct mbfl_identify_vtbl vtbl_identify_false = { mbfl_no_encoding_pass, @@ -140,6 +141,7 @@ static const struct mbfl_identify_vtbl *mbfl_identify_filter_list[] = { &vtbl_identify_8859_14, &vtbl_identify_8859_15, &vtbl_identify_armscii8, + &vtbl_identify_cp850, &vtbl_identify_false, NULL }; diff --git a/ext/mbstring/mbstring.dsp b/ext/mbstring/mbstring.dsp index 629b0bd4d9..2e050c6d20 100644 --- a/ext/mbstring/mbstring.dsp +++ b/ext/mbstring/mbstring.dsp @@ -374,6 +374,10 @@ SOURCE=.\libmbfl\filters\mbfilter_armscii8.c # End Source File # Begin Source File +SOURCE=.\libmbfl\filters\mbfilter_cp850.c +# End Source File +# Begin Source File + SOURCE=.\libmbfl\mbfl\mbfilter_pass.c # End Source File # Begin Source File @@ -687,6 +691,10 @@ SOURCE=.\libmbfl\filters\mbfilter_armscii8.h # End Source File # Begin Source File +SOURCE=.\libmbfl\filters\mbfilter_cp850.h +# End Source File +# Begin Source File + SOURCE=.\libmbfl\mbfl\mbfilter_pass.h # End Source File # Begin Source File @@ -911,6 +919,10 @@ SOURCE=.\libmbfl\filters\unicode_table_armscii8.h # End Source File # Begin Source File +SOURCE=.\libmbfl\filters\unicode_table_cp850.h +# End Source File +# Begin Source File + SOURCE=.\libmbfl\filters\unicode_table_uhc.h # End Source File # End Group -- 2.40.0