From 23270d7f9eb337ad640512aca79530a7c1f96a59 Mon Sep 17 00:00:00 2001 From: Alex Dowad Date: Sat, 19 Sep 2020 20:27:55 +0200 Subject: [PATCH] Add identify filter for ISO-8859-3 (Latin-3) There are some bytes in this encoding which are not mapped to any character. Notably, MicroSoft added their own mappings for these 'unused' bits in their version of Latin-3, called CP28593. --- ext/mbstring/libmbfl/filters/mbfilter_iso8859_3.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/ext/mbstring/libmbfl/filters/mbfilter_iso8859_3.c b/ext/mbstring/libmbfl/filters/mbfilter_iso8859_3.c index 3b1c0847ea..0c061055f8 100644 --- a/ext/mbstring/libmbfl/filters/mbfilter_iso8859_3.c +++ b/ext/mbstring/libmbfl/filters/mbfilter_iso8859_3.c @@ -31,6 +31,8 @@ #include "mbfilter_iso8859_3.h" #include "unicode_table_iso8859_3.h" +static int mbfl_filt_ident_iso8859_3(int c, mbfl_identify_filter *filter); + static const char *mbfl_encoding_8859_3_aliases[] = {"ISO8859-3", "latin3", NULL}; const mbfl_encoding mbfl_encoding_8859_3 = { @@ -47,7 +49,7 @@ const mbfl_encoding mbfl_encoding_8859_3 = { const struct mbfl_identify_vtbl vtbl_identify_8859_3 = { mbfl_no_encoding_8859_3, mbfl_filt_ident_common_ctor, - mbfl_filt_ident_true + mbfl_filt_ident_iso8859_3 }; const struct mbfl_convert_vtbl vtbl_8859_3_wchar = { @@ -132,3 +134,11 @@ int mbfl_filt_conv_wchar_8859_3(int c, mbfl_convert_filter *filter) return c; } + +static int mbfl_filt_ident_iso8859_3(int c, mbfl_identify_filter *filter) +{ + if (c >= 0xA0 && !iso8859_3_ucs_table[c - 0xA0]) { + filter->status = 1; + } + return c; +} -- 2.40.0