libmbfl/filters/mbfilter_jis.c
libmbfl/filters/mbfilter_koi8r.c
libmbfl/filters/mbfilter_qprint.c
+ libmbfl/filters/mbfilter_armscii8.c
libmbfl/filters/mbfilter_sjis.c
libmbfl/filters/mbfilter_ucs2.c
libmbfl/filters/mbfilter_ucs4.c
libmbfl/nls/nls_ru.c
libmbfl/nls/nls_uni.c
libmbfl/nls/nls_zh.c
+ libmbfl/nls/nls_hy.c
+ libmbfl/nls/nls_tr.c
])
PHP_MBSTRING_ADD_CFLAG([-DHAVE_CONFIG_H])
else
noinst_LTLIBRARIES=libmbfl_filters.la
INCLUDES=-I../mbfl
libmbfl_filters_la_LDFLAGS=-version-info $(SHLIB_VERSION)
-libmbfl_filters_la_SOURCES=mbfilter_cp936.c mbfilter_hz.c mbfilter_euc_tw.c mbfilter_big5.c mbfilter_euc_jp.c mbfilter_jis.c mbfilter_iso8859_1.c mbfilter_iso8859_2.c mbfilter_cp1252.c mbfilter_cp1251.c mbfilter_ascii.c mbfilter_iso8859_3.c mbfilter_iso8859_4.c mbfilter_iso8859_5.c mbfilter_iso8859_6.c mbfilter_iso8859_7.c mbfilter_iso8859_8.c mbfilter_iso8859_9.c mbfilter_iso8859_10.c mbfilter_iso8859_13.c mbfilter_iso8859_14.c mbfilter_iso8859_15.c mbfilter_iso8859_16.c mbfilter_htmlent.c mbfilter_byte2.c mbfilter_byte4.c mbfilter_uuencode.c mbfilter_base64.c mbfilter_sjis.c mbfilter_7bit.c mbfilter_qprint.c mbfilter_ucs4.c mbfilter_ucs2.c mbfilter_utf32.c mbfilter_utf16.c mbfilter_utf8.c mbfilter_utf7.c mbfilter_utf7imap.c mbfilter_euc_jp_win.c mbfilter_cp932.c mbfilter_euc_cn.c mbfilter_euc_kr.c mbfilter_uhc.c mbfilter_iso2022_kr.c mbfilter_cp866.c mbfilter_koi8r.c html_entities.c cp932_table.h html_entities.h mbfilter_7bit.h mbfilter_ascii.h mbfilter_base64.h mbfilter_big5.h mbfilter_byte2.h mbfilter_byte4.h mbfilter_cp1251.h mbfilter_cp1252.h mbfilter_cp866.h mbfilter_cp932.h mbfilter_cp936.h mbfilter_euc_cn.h mbfilter_euc_jp.h mbfilter_euc_jp_win.h mbfilter_euc_kr.h mbfilter_euc_tw.h mbfilter_htmlent.h mbfilter_hz.h mbfilter_iso2022_kr.h mbfilter_iso8859_1.h mbfilter_iso8859_10.h mbfilter_iso8859_13.h mbfilter_iso8859_14.h mbfilter_iso8859_15.h mbfilter_iso8859_16.h mbfilter_iso8859_2.h mbfilter_iso8859_3.h mbfilter_iso8859_4.h mbfilter_iso8859_5.h mbfilter_iso8859_6.h mbfilter_iso8859_7.h mbfilter_iso8859_8.h mbfilter_iso8859_9.h mbfilter_jis.h mbfilter_koi8r.h mbfilter_qprint.h mbfilter_sjis.h mbfilter_ucs2.h mbfilter_ucs4.h mbfilter_uhc.h mbfilter_utf16.h mbfilter_utf32.h mbfilter_utf7.h mbfilter_utf7imap.h mbfilter_utf8.h mbfilter_uuencode.h unicode_prop.h unicode_table_big5.h unicode_table_cns11643.h unicode_table_cp1251.h unicode_table_cp1252.h unicode_table_cp866.h unicode_table_cp932_ext.h unicode_table_cp936.h unicode_table_iso8859_10.h unicode_table_iso8859_13.h unicode_table_iso8859_14.h unicode_table_iso8859_15.h unicode_table_iso8859_16.h unicode_table_iso8859_2.h unicode_table_iso8859_3.h unicode_table_iso8859_4.h unicode_table_iso8859_5.h unicode_table_iso8859_6.h unicode_table_iso8859_7.h unicode_table_iso8859_8.h unicode_table_iso8859_9.h unicode_table_jis.h unicode_table_koi8r.h unicode_table_uhc.h
+libmbfl_filters_la_SOURCES=mbfilter_cp936.c mbfilter_hz.c mbfilter_euc_tw.c mbfilter_big5.c mbfilter_euc_jp.c mbfilter_jis.c mbfilter_iso8859_1.c mbfilter_iso8859_2.c mbfilter_cp1252.c mbfilter_cp1251.c mbfilter_ascii.c mbfilter_iso8859_3.c mbfilter_iso8859_4.c mbfilter_iso8859_5.c mbfilter_iso8859_6.c mbfilter_iso8859_7.c mbfilter_iso8859_8.c mbfilter_iso8859_9.c mbfilter_iso8859_10.c mbfilter_iso8859_13.c mbfilter_iso8859_14.c mbfilter_iso8859_15.c mbfilter_iso8859_16.c mbfilter_htmlent.c mbfilter_byte2.c mbfilter_byte4.c mbfilter_uuencode.c mbfilter_base64.c mbfilter_sjis.c mbfilter_7bit.c mbfilter_qprint.c mbfilter_ucs4.c mbfilter_ucs2.c mbfilter_utf32.c mbfilter_utf16.c mbfilter_utf8.c mbfilter_utf7.c mbfilter_utf7imap.c mbfilter_euc_jp_win.c mbfilter_cp932.c mbfilter_euc_cn.c mbfilter_euc_kr.c mbfilter_uhc.c mbfilter_iso2022_kr.c mbfilter_cp866.c mbfilter_koi8r.c mbfilter_armscii8.c html_entities.c cp932_table.h html_entities.h mbfilter_7bit.h mbfilter_ascii.h mbfilter_base64.h mbfilter_big5.h mbfilter_byte2.h mbfilter_byte4.h mbfilter_cp1251.h mbfilter_cp1252.h mbfilter_cp866.h mbfilter_cp932.h mbfilter_cp936.h mbfilter_euc_cn.h mbfilter_euc_jp.h mbfilter_euc_jp_win.h mbfilter_euc_kr.h mbfilter_euc_tw.h mbfilter_htmlent.h mbfilter_hz.h mbfilter_iso2022_kr.h mbfilter_iso8859_1.h mbfilter_iso8859_10.h mbfilter_iso8859_13.h mbfilter_iso8859_14.h mbfilter_iso8859_15.h mbfilter_iso8859_16.h mbfilter_iso8859_2.h mbfilter_iso8859_3.h mbfilter_iso8859_4.h mbfilter_iso8859_5.h mbfilter_iso8859_6.h mbfilter_iso8859_7.h mbfilter_iso8859_8.h mbfilter_iso8859_9.h mbfilter_jis.h mbfilter_koi8r.h mbfilter_armscii8.h mbfilter_qprint.h mbfilter_sjis.h mbfilter_ucs2.h mbfilter_ucs4.h mbfilter_uhc.h mbfilter_utf16.h mbfilter_utf32.h mbfilter_utf7.h mbfilter_utf7imap.h mbfilter_utf8.h mbfilter_uuencode.h unicode_prop.h unicode_table_big5.h unicode_table_cns11643.h unicode_table_cp1251.h unicode_table_cp1252.h unicode_table_cp866.h unicode_table_cp932_ext.h unicode_table_cp936.h unicode_table_iso8859_10.h unicode_table_iso8859_13.h unicode_table_iso8859_14.h unicode_table_iso8859_15.h unicode_table_iso8859_16.h unicode_table_iso8859_2.h unicode_table_iso8859_3.h unicode_table_iso8859_4.h unicode_table_iso8859_5.h unicode_table_iso8859_6.h unicode_table_iso8859_7.h unicode_table_iso8859_8.h unicode_table_iso8859_9.h unicode_table_jis.h unicode_table_koi8r.h unicode_table_armscii8.h unicode_table_uhc.h
mbfilter_iso8859_2.c: unicode_table_iso8859_2.h
!include ..\rules.mak.bcc32
INCLUDES=$(INCLUDES) -I../mbfl
-OBJS=mbfilter_cp936.obj mbfilter_hz.obj mbfilter_euc_tw.obj mbfilter_big5.obj mbfilter_euc_jp.obj mbfilter_jis.obj mbfilter_iso8859_1.obj mbfilter_iso8859_2.obj mbfilter_cp1252.obj mbfilter_cp1251.obj mbfilter_ascii.obj mbfilter_iso8859_3.obj mbfilter_iso8859_4.obj mbfilter_iso8859_5.obj mbfilter_iso8859_6.obj mbfilter_iso8859_7.obj mbfilter_iso8859_8.obj mbfilter_iso8859_9.obj mbfilter_iso8859_10.obj mbfilter_iso8859_13.obj mbfilter_iso8859_14.obj mbfilter_iso8859_15.obj mbfilter_iso8859_16.obj mbfilter_htmlent.obj mbfilter_byte2.obj mbfilter_byte4.obj mbfilter_uuencode.obj mbfilter_base64.obj mbfilter_sjis.obj mbfilter_7bit.obj mbfilter_qprint.obj mbfilter_ucs4.obj mbfilter_ucs2.obj mbfilter_utf32.obj mbfilter_utf16.obj mbfilter_utf8.obj mbfilter_utf7.obj mbfilter_utf7imap.obj mbfilter_euc_jp_win.obj mbfilter_cp932.obj mbfilter_euc_cn.obj mbfilter_euc_kr.obj mbfilter_uhc.obj mbfilter_iso2022_kr.obj mbfilter_cp866.obj mbfilter_koi8r.obj html_entities.obj
+OBJS=mbfilter_cp936.obj mbfilter_hz.obj mbfilter_euc_tw.obj mbfilter_big5.obj mbfilter_euc_jp.obj mbfilter_jis.obj mbfilter_iso8859_1.obj mbfilter_iso8859_2.obj mbfilter_cp1252.obj mbfilter_cp1251.obj mbfilter_ascii.obj mbfilter_iso8859_3.obj mbfilter_iso8859_4.obj mbfilter_iso8859_5.obj mbfilter_iso8859_6.obj mbfilter_iso8859_7.obj mbfilter_iso8859_8.obj mbfilter_iso8859_9.obj mbfilter_iso8859_10.obj mbfilter_iso8859_13.obj mbfilter_iso8859_14.obj mbfilter_iso8859_15.obj mbfilter_iso8859_16.obj mbfilter_htmlent.obj mbfilter_byte2.obj mbfilter_byte4.obj mbfilter_uuencode.obj mbfilter_base64.obj mbfilter_sjis.obj mbfilter_7bit.obj mbfilter_qprint.obj mbfilter_ucs4.obj mbfilter_ucs2.obj mbfilter_utf32.obj mbfilter_utf16.obj mbfilter_utf8.obj mbfilter_utf7.obj mbfilter_utf7imap.obj mbfilter_euc_jp_win.obj mbfilter_cp932.obj mbfilter_euc_cn.obj mbfilter_euc_kr.obj mbfilter_uhc.obj mbfilter_iso2022_kr.obj mbfilter_cp866.obj mbfilter_koi8r.obj html_entities.obj mbfilter_armscii8.obj
all: $(OBJS)
--- /dev/null
+/*
+ * "streamable kanji code filter and converter"
+ * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved.
+ *
+ * LICENSE NOTICES
+ *
+ * This file is part of "streamable kanji code filter and converter",
+ * which is distributed under the terms of GNU Lesser General Public
+ * License (version 2) as published by the Free Software Foundation.
+ *
+ * This software is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with "streamable kanji code filter and converter";
+ * if not, write to the Free Software Foundation, Inc., 59 Temple Place,
+ * Suite 330, Boston, MA 02111-1307 USA
+ *
+ * The author of this file: Hayk Chamyan <hamshen@gmail.com>
+ *
+ */
+
+/*
+ * "armenian code filter and converter"
+ */
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "mbfilter.h"
+#include "mbfilter_armscii8.h"
+#include "unicode_table_armscii8.h"
+
+static int mbfl_filt_ident_armscii8(int c, mbfl_identify_filter *filter);
+
+static const char *mbfl_encoding_armscii8_aliases[] = {"ArmSCII-8", "ArmSCII8", "ARMSCII-8", "ARMSCII8", NULL};
+
+const mbfl_encoding mbfl_encoding_armscii8 = {
+ mbfl_no_encoding_armscii8,
+ "ArmSCII-8",
+ "ArmSCII-8",
+ (const char *(*)[])&mbfl_encoding_armscii8_aliases,
+ NULL,
+ MBFL_ENCTYPE_SBCS
+};
+
+const struct mbfl_identify_vtbl vtbl_identify_armscii8 = {
+ mbfl_no_encoding_armscii8,
+ mbfl_filt_ident_common_ctor,
+ mbfl_filt_ident_common_dtor,
+ mbfl_filt_ident_armscii8
+};
+
+const struct mbfl_convert_vtbl vtbl_wchar_armscii8 = {
+ mbfl_no_encoding_wchar,
+ mbfl_no_encoding_armscii8,
+ mbfl_filt_conv_common_ctor,
+ mbfl_filt_conv_common_dtor,
+ mbfl_filt_conv_wchar_armscii8,
+ mbfl_filt_conv_common_flush
+};
+
+const struct mbfl_convert_vtbl vtbl_armscii8_wchar = {
+ mbfl_no_encoding_armscii8,
+ mbfl_no_encoding_wchar,
+ mbfl_filt_conv_common_ctor,
+ mbfl_filt_conv_common_dtor,
+ mbfl_filt_conv_armscii8_wchar,
+ mbfl_filt_conv_common_flush
+};
+
+#define CK(statement) do { if ((statement) < 0) return (-1); } while (0)
+
+/*
+ * armscii8 => wchar
+ */
+int mbfl_filt_conv_armscii8_wchar(int c, mbfl_convert_filter *filter)
+{
+ int s;
+
+ if (c >= 0 && c < armscii8_ucs_table_min) {
+ s = c;
+ } else if (c >= armscii8_ucs_table_min && c < 0x100) {
+ s = armscii8_ucs_table[c - armscii8_ucs_table_min];
+ if (s <= 0) {
+ s = c;
+ s &= MBFL_WCSPLANE_MASK;
+ s |= MBFL_WCSPLANE_ARMSCII8;
+ }
+ } else {
+ s = c;
+ s &= MBFL_WCSGROUP_MASK;
+ s |= MBFL_WCSGROUP_THROUGH;
+ }
+
+ CK((*filter->output_function)(s, filter->data));
+
+ return c;
+}
+
+/*
+ * wchar => armscii8
+ */
+int mbfl_filt_conv_wchar_armscii8(int c, mbfl_convert_filter *filter)
+{
+
+ int s, n;
+
+ if (c >= 0x28 && c < 0x30) {
+ s = ucs_armscii8_table[c-0x28];
+ } else if (c < armscii8_ucs_table_min) {
+ s = c;
+ } else {
+ s = -1;
+ n = armscii8_ucs_table_len-1;
+ while (n >= 0) {
+ if (c == armscii8_ucs_table[n]) {
+ s = armscii8_ucs_table_min + n;
+ break;
+ }
+ n--;
+ }
+ if (s <= 0 && (c & ~MBFL_WCSPLANE_MASK) == MBFL_WCSPLANE_ARMSCII8) {
+ s = c & MBFL_WCSPLANE_MASK;
+ }
+ }
+
+ if (s >= 0) {
+ CK((*filter->output_function)(s, filter->data));
+ } else {
+ if (filter->illegal_mode != MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE) {
+ CK(mbfl_filt_conv_illegal_output(c, filter));
+ }
+ }
+
+ return c;
+}
+
+static int mbfl_filt_ident_armscii8(int c, mbfl_identify_filter *filter)
+{
+ if (c >= armscii8_ucs_table_min && c <= 0xff)
+ filter->flag = 0;
+ else
+ filter->flag = 1; /* not it */
+ return c;
+}
--- /dev/null
+/*
+ * "streamable kanji code filter and converter"
+ * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved.
+ *
+ * LICENSE NOTICES
+ *
+ * This file is part of "streamable kanji code filter and converter",
+ * which is distributed under the terms of GNU Lesser General Public
+ * License (version 2) as published by the Free Software Foundation.
+ *
+ * This software is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with "streamable kanji code filter and converter";
+ * if not, write to the Free Software Foundation, Inc., 59 Temple Place,
+ * Suite 330, Boston, MA 02111-1307 USA
+ *
+ * The author of this file: Hayk Chamyan <hamshen@gmail.com>
+ *
+ */
+
+#ifndef MBFL_MBFILTER_ARMSCII8_H
+#define MBFL_MBFILTER_ARMSCII8_H
+
+#include "mbfilter.h"
+
+extern const mbfl_encoding mbfl_encoding_armscii8;
+extern const struct mbfl_identify_vtbl vtbl_identify_armscii8;
+extern const struct mbfl_convert_vtbl vtbl_wchar_armscii8;
+extern const struct mbfl_convert_vtbl vtbl_armscii8_wchar;
+
+int mbfl_filt_conv_armscii8_wchar(int c, mbfl_convert_filter *filter);
+int mbfl_filt_conv_wchar_armscii8(int c, mbfl_convert_filter *filter);
+
+#endif /* MBFL_MBFILTER_ARMSCII8_H */
+
+/*
+ * Local variables:
+ * tab-width: 4
+ * c-basic-offset: 4
+ * End:
+ */
--- /dev/null
+/*
+ * "streamable kanji code filter and converter"
+ * Copyright (c) 1998-2002 HappySize, Inc. All rights reserved.
+ *
+ * LICENSE NOTICES
+ *
+ * This file is part of "streamable kanji code filter and converter",
+ * which is distributed under the terms of GNU Lesser General Public
+ * License (version 2) as published by the Free Software Foundation.
+ *
+ * This software is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with "streamable kanji code filter and converter";
+ * if not, write to the Free Software Foundation, Inc., 59 Temple Place,
+ * Suite 330, Boston, MA 02111-1307 USA
+ *
+ * The author of this file: Hayk Chamyan <hamshen@gmail.com>
+ *
+ */
+
+#ifndef UNICODE_TABLE_ARMSCII8_H
+#define UNICODE_TABLE_ARMSCII8_H
+
+/* ArmSCII-8 to Unicode table */
+static const unsigned short armscii8_ucs_table[] = {
+/*0x00a0, 0x00a1, 0x0587, 0x0589, 0x0029, 0x0028, 0x00bb, 0x00ab,*/
+0x00a0, 0xfffd, 0x0587, 0x0589, 0x0029, 0x0028, 0x00bb, 0x00ab,
+0x2014, 0x002e, 0x055d, 0x002c, 0x002d, 0x058a, 0x2026, 0x055c,
+0x055b, 0x055e, 0x0531, 0x0561, 0x0532, 0x0562, 0x0533, 0x0563,
+0x0534, 0x0564, 0x0535, 0x0565, 0x0536, 0x0566, 0x0537, 0x0567,
+0x0538, 0x0568, 0x0539, 0x0569, 0x053a, 0x056a, 0x053b, 0x056b,
+0x053c, 0x056c, 0x053d, 0x056d, 0x053e, 0x056e, 0x053f, 0x056f,
+0x0540, 0x0570, 0x0541, 0x0571, 0x0542, 0x0572, 0x0543, 0x0573,
+0x0544, 0x0574, 0x0545, 0x0575, 0x0546, 0x0576, 0x0547, 0x0577,
+0x0548, 0x0578, 0x0549, 0x0579, 0x054a, 0x057a, 0x054b, 0x057b,
+0x054c, 0x057c, 0x054d, 0x057d, 0x054e, 0x057e, 0x054f, 0x057f,
+0x0550, 0x0580, 0x0551, 0x0581, 0x0552, 0x0582, 0x0553, 0x0583,
+/*0x0554, 0x0584, 0x0555, 0x0585, 0x0556, 0x0586, 0x055a, 0x00ff*/
+0x0554, 0x0584, 0x0555, 0x0585, 0x0556, 0x0586, 0x055a, 0xfffd
+};
+
+static const unsigned char ucs_armscii8_table[] = {
+0xa5, 0xa4, 0x2a, 0x2b, 0xab, 0xac, 0xa9, 0x2f
+};
+
+static const int armscii8_ucs_table_min = 0xa0;
+static const int armscii8_ucs_table_len = (sizeof (armscii8_ucs_table) / sizeof (unsigned short));
+static const int armscii8_ucs_table_max = 0xa0 + (sizeof (armscii8_ucs_table) / sizeof (unsigned short));
+
+#endif /* UNICODE_TABLE_ARMSCII8_H */
# End Source File\r
# Begin Source File\r
\r
+SOURCE=.\filters\mbfilter_armscii8.c\r
+# End Source File\r
+# Begin Source File\r
+\r
SOURCE=.\mbfl\mbfilter_pass.c\r
# End Source File\r
# Begin Source File\r
# End Source File\r
# Begin Source File\r
\r
+SOURCE=.\nls\nls_hy.c\r
+# End Source File\r
+# Begin Source File\r
+\r
SOURCE=.\nls\nls_kr.c\r
# End Source File\r
# Begin Source File\r
# End Source File\r
# Begin Source File\r
\r
+SOURCE=.\filters\mbfilter_armscii8.h\r
+# End Source File\r
+# Begin Source File\r
+\r
SOURCE=.\mbfl\mbfilter_pass.h\r
# End Source File\r
# Begin Source File\r
# End Source File\r
# Begin Source File\r
\r
+SOURCE=.\nls\nls_hy.h\r
+# End Source File\r
+# Begin Source File\r
+\r
SOURCE=.\nls\nls_kr.h\r
# End Source File\r
# Begin Source File\r
# End Source File\r
# Begin Source File\r
\r
+SOURCE=.\filters\unicode_table_armscii8.h\r
+# End Source File\r
+# Begin Source File\r
+\r
SOURCE=.\filters\unicode_table_uhc.h\r
# End Source File\r
# End Group\r
* encoding detector
*/
mbfl_encoding_detector *
-mbfl_encoding_detector_new(enum mbfl_no_encoding *elist, int elistsz)
+mbfl_encoding_detector_new(enum mbfl_no_encoding *elist, int elistsz, int strict)
{
mbfl_encoding_detector *identd;
}
identd->filter_list_size = num;
+ /* set strict flag */
+ identd->strict = strict;
+
return identd;
}
while (n >= 0) {
filter = identd->filter_list[n];
if (!filter->flag) {
+ if (identd->strict && filter->status) {
+ continue;
+ }
encoding = filter->encoding->no_encoding;
}
n--;
}
+
+ if (encoding == mbfl_no_encoding_invalid) {
+ n = identd->filter_list_size - 1;
+ while (n >= 0) {
+ filter = identd->filter_list[n];
+ if (!filter->flag) {
+ encoding = filter->encoding->no_encoding;
+ }
+ n--;
+ }
+ }
}
return encoding;
for (i = 0; i < num; i++) {
filter = &flist[i];
if (!filter->flag) {
+ if (strict && filter->status) {
+ continue;
+ }
encoding = filter->encoding;
break;
}
}
+ /* fall-back judge */
+ if (!encoding) {
+ for (i = 0; i < num; i++) {
+ filter = &flist[i];
+ if (!filter->flag) {
+ encoding = filter->encoding;
+ break;
+ }
+ }
+ }
+
/* cleanup */
/* dtors should be called in reverse order */
i = num; while (--i >= 0) {
}
enum mbfl_no_encoding
-mbfl_identify_encoding_no(mbfl_string *string, enum mbfl_no_encoding *elist, int elistsz)
+mbfl_identify_encoding_no(mbfl_string *string, enum mbfl_no_encoding *elist, int elistsz, int strict)
{
const mbfl_encoding *encoding;
- encoding = mbfl_identify_encoding(string, elist, elistsz, 0);
+ encoding = mbfl_identify_encoding(string, elist, elistsz, strict);
if (encoding != NULL &&
encoding->no_encoding > mbfl_no_encoding_charset_min &&
encoding->no_encoding < mbfl_no_encoding_charset_max) {
struct _mbfl_encoding_detector {
mbfl_identify_filter **filter_list;
int filter_list_size;
+ int strict;
};
-MBFLAPI extern mbfl_encoding_detector * mbfl_encoding_detector_new(enum mbfl_no_encoding *elist, int elistsz);
+MBFLAPI extern mbfl_encoding_detector * mbfl_encoding_detector_new(enum mbfl_no_encoding *elist, int elistsz, int strict);
MBFLAPI extern void mbfl_encoding_detector_delete(mbfl_encoding_detector *identd);
MBFLAPI extern int mbfl_encoding_detector_feed(mbfl_encoding_detector *identd, mbfl_string *string);
MBFLAPI extern enum mbfl_no_encoding mbfl_encoding_detector_judge(mbfl_encoding_detector *identd);
mbfl_identify_encoding_name(mbfl_string *string, enum mbfl_no_encoding *elist, int elistsz, int strict);
MBFLAPI extern enum mbfl_no_encoding
-mbfl_identify_encoding_no(mbfl_string *string, enum mbfl_no_encoding *elist, int elistsz);
+mbfl_identify_encoding_no(mbfl_string *string, enum mbfl_no_encoding *elist, int elistsz, int strict);
/*
* strlen
#define MBFL_WCSPLANE_CP866 0x70f80000
#define MBFL_WCSPLANE_KOI8R 0x70f90000
#define MBFL_WCSPLANE_8859_16 0x70fa0000 /* 00h - FFh */
+#define MBFL_WCSPLANE_ARMSCII8 0x70fb0000
#define MBFL_WCSGROUP_MASK 0xffffff
#define MBFL_WCSGROUP_UCS4MAX 0x70000000
#define MBFL_WCSGROUP_WCHARMAX 0x78000000
#include "filters/mbfilter_ucs4.h"
#include "filters/mbfilter_ucs2.h"
#include "filters/mbfilter_htmlent.h"
+#include "filters/mbfilter_armscii8.h"
static void mbfl_convert_filter_reset_vtbl(mbfl_convert_filter *filter);
&vtbl_wchar_byte2be,
&vtbl_byte2le_wchar,
&vtbl_wchar_byte2le,
+ &vtbl_armscii8_wchar,
+ &vtbl_wchar_armscii8,
&vtbl_pass,
NULL
};
#include "filters/mbfilter_ucs4.h"
#include "filters/mbfilter_ucs2.h"
#include "filters/mbfilter_htmlent.h"
+#include "filters/mbfilter_armscii8.h"
#ifndef HAVE_STRCASECMP
#ifdef HAVE_STRICMP
&mbfl_encoding_cp1251,
&mbfl_encoding_cp866,
&mbfl_encoding_koi8r,
+ &mbfl_encoding_armscii8,
NULL
};
mbfl_no_encoding_cp866,
mbfl_no_encoding_koi8r,
mbfl_no_encoding_8859_16,
+ mbfl_no_encoding_armscii8,
mbfl_no_encoding_charset_max
};
#include "filters/mbfilter_ucs4.h"
#include "filters/mbfilter_ucs2.h"
#include "filters/mbfilter_htmlent.h"
+#include "filters/mbfilter_armscii8.h"
static const struct mbfl_identify_vtbl vtbl_identify_false = {
mbfl_no_encoding_pass,
&vtbl_identify_8859_13,
&vtbl_identify_8859_14,
&vtbl_identify_8859_15,
+ &vtbl_identify_armscii8,
&vtbl_identify_false,
NULL
};
#include "nls/nls_de.h"
#include "nls/nls_ru.h"
#include "nls/nls_en.h"
+#include "nls/nls_hy.h"
+#include "nls/nls_tr.h"
#include "nls/nls_neutral.h"
#ifndef HAVE_STRCASECMP
&mbfl_language_english,
&mbfl_language_german,
&mbfl_language_russian,
+ &mbfl_language_armenian,
+ &mbfl_language_turkish,
&mbfl_language_neutral,
NULL
};
mbfl_no_language_simplified_chinese, /* zh-cn */
mbfl_no_language_traditional_chinese, /* zh-tw */
mbfl_no_language_russian, /* ru */
+ mbfl_no_language_armenian, /* hy */
+ mbfl_no_language_turkish, /* tr */
mbfl_no_language_max
};
# Description: a script to generate east asian width table.
#
-function conv(str) {
- if (!match(str, "^0[xX]")) {
- return 0 + str
- }
-
- retval = 0
-
- for (i = 3; i <= length(str); i++) {
- n = index("0123456789abcdefABCDEF", substr(str, i, 1)) - 1
-
- if (n < 0) {
- return 0 + str;
- } else if (n >= 16) {
- n -= 6;
- }
-
- retval = retval * 16 + n
- }
-
- return retval
-}
-
BEGIN {
prev = -1
comma = 0
/^[0-9a-fA-F]+;/ {
if ($2 == "W" || $2 == "F") {
- v = conv("0x" $1)
+ v = ( "0x" $1 ) + 0
if (prev < 0) {
first = v
} else if (v - prev > 1) {
/^[0-9a-fA-F]+\.\./ {
if ($4 == "W" || $4 == "F") {
- vs = conv("0x" $1)
- ve = conv("0x" $3)
+ vs = ( "0x" $1 ) + 0
+ ve = ( "0x" $3 ) + 0
if (prev < 0) {
first = vs
} else if (vs - prev > 1) {
noinst_LTLIBRARIES=libmbfl_nls.la
INCLUDES=-I../mbfl
libmbfl_nls_la_LDFLAGS=-version-info $(SHLIB_VERSION)
-libmbfl_nls_la_SOURCES=nls_ja.c nls_de.c nls_en.c nls_kr.c nls_ru.c nls_zh.c nls_uni.c nls_neutral.c nls_ja.h nls_de.h nls_en.h nls_kr.h nls_ru.h nls_zh.h nls_uni.h nls_neutral.h
+libmbfl_nls_la_SOURCES=nls_ja.c nls_de.c nls_en.c nls_hy.c nls_kr.c nls_ru.c nls_zh.c nls_uni.c nls_neutral.c nls_ja.h nls_de.h nls_en.h nls_hy.h nls_kr.h nls_ru.h nls_zh.h nls_uni.h nls_neutral.h
!include ..\rules.mak.bcc32
INCLUDES=$(INCLUDES) -I..\mbfl
-OBJS=nls_ja.obj nls_de.obj nls_en.obj nls_kr.obj nls_ru.obj nls_zh.obj nls_uni.obj nls_neutral.obj
+OBJS=nls_ja.obj nls_de.obj nls_en.obj nls_hy.obj nls_kr.obj nls_ru.obj nls_zh.obj nls_uni.obj nls_neutral.obj
all: $(OBJS)
--- /dev/null
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#ifdef HAVE_STDDEF_H
+#include <stddef.h>
+#endif
+
+#include "mbfilter.h"
+#include "nls_hy.h"
+
+const mbfl_language mbfl_language_armenian = {
+ mbfl_no_language_armenian ,
+ "Armenian ",
+ "hy",
+ NULL,
+ mbfl_no_encoding_armscii8,
+ mbfl_no_encoding_qprint,
+ mbfl_no_encoding_8bit
+};
--- /dev/null
+#ifndef MBFL_NLS_HY_H
+#define MBFL_NLS_HY_H
+
+#include "mbfilter.h"
+#include "nls_hy.h"
+
+extern const mbfl_language mbfl_language_armenian;
+
+#endif /* MBFL_NLS_HY_H */
--- /dev/null
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#ifdef HAVE_STDDEF_H
+#include <stddef.h>
+#endif
+
+#include "mbfilter.h"
+#include "nls_tr.h"
+
+const mbfl_language mbfl_language_turkish = {
+ mbfl_no_language_turkish,
+ "Turkish",
+ "tr",
+ NULL,
+ mbfl_no_encoding_8859_9,
+ mbfl_no_encoding_qprint,
+ mbfl_no_encoding_8bit
+};
+
--- /dev/null
+#ifndef MBFL_NLS_TR_H
+#define MBFL_NLS_TR_H
+
+#include "mbfilter.h"
+
+extern const mbfl_language mbfl_language_turkish;
+
+#endif /* MBFL_NLS_TR_H */
from_encoding = mbfl_no_encoding_invalid;
identd = mbfl_encoding_detector_new(
(enum mbfl_no_encoding *)info->from_encodings,
- info->num_from_encodings);
+ info->num_from_encodings, 0);
if (identd) {
n = 0;
while (n < num) {
} else {
/* auto detect */
from_encoding = mbfl_no_encoding_invalid;
- identd = mbfl_encoding_detector_new(elist, elistsz);
+ identd = mbfl_encoding_detector_new(elist, elistsz, 0);
if (identd != NULL) {
n = 0;
while (n < num) {
}
}
+ if (((MBSTRG(func_overload) & MB_OVERLOAD_STRING) == MB_OVERLOAD_STRING)
+ && (from >= mbfl_strlen(&string))) {
+ RETURN_FALSE;
+ }
+
ret = mbfl_substr(&string, &result, from, len);
if (ret != NULL) {
RETVAL_STRINGL((char *)ret->val, ret->len, 0); /* the string is already strdup()'ed */
string.no_encoding = from_encoding;
} else if (size > 1) {
/* auto detect */
- from_encoding = mbfl_identify_encoding_no(&string, list, size);
+ from_encoding = mbfl_identify_encoding_no(&string, list, size, 0);
if (from_encoding != mbfl_no_encoding_invalid) {
string.no_encoding = from_encoding;
} else {
stack = (pval ***)safe_emalloc(stack_max, sizeof(pval **), 0);
if (stack != NULL) {
stack_level = 0;
- identd = mbfl_encoding_detector_new(elist, elistsz);
+ identd = mbfl_encoding_detector_new(elist, elistsz, 0);
if (identd != NULL) {
n = 2;
while (n < argc || stack_level > 0) {
orig_str.no_encoding = MBSTRG(current_internal_encoding);
if (orig_str.no_encoding == mbfl_no_encoding_invalid
|| orig_str.no_encoding == mbfl_no_encoding_pass) {
- orig_str.no_encoding = mbfl_identify_encoding_no(&orig_str, MBSTRG(current_detect_order_list), MBSTRG(current_detect_order_list_size));
+ orig_str.no_encoding = mbfl_identify_encoding_no(&orig_str, MBSTRG(current_detect_order_list), MBSTRG(current_detect_order_list_size), 0);
}
pstr = mbfl_mime_header_encode(&orig_str, &conv_str, tran_cs, head_enc, "\n", sizeof("Subject: [PHP-jp nnnnnnnn]"));
if (pstr != NULL) {
if (orig_str.no_encoding == mbfl_no_encoding_invalid
|| orig_str.no_encoding == mbfl_no_encoding_pass) {
- orig_str.no_encoding = mbfl_identify_encoding_no(&orig_str, MBSTRG(current_detect_order_list), MBSTRG(current_detect_order_list_size));
+ orig_str.no_encoding = mbfl_identify_encoding_no(&orig_str, MBSTRG(current_detect_order_list), MBSTRG(current_detect_order_list_size), 0);
}
pstr = NULL;
mbfl_string_init(&string);
string.no_language = MBSTRG(current_language);
- identd = mbfl_encoding_detector_new(elist, size);
+ identd = mbfl_encoding_detector_new(elist, size, 0);
if (identd) {
int n = 0;