Note that some text encoding conversion libraries, such as Solaris iconv
and FreeBSD iconv, map 0x30-0x39 to the Arabic script numerals rather than
the 'regular' Roman numerals. (That is, to Unicode codepoints 0x660-0x669.)
Further, Windows CP28596 adds more mappings to use the unused bytes in
ISO-8859-6.
#include "mbfilter_iso8859_6.h"
#include "unicode_table_iso8859_6.h"
+static int mbfl_filt_ident_iso8859_6(int c, mbfl_identify_filter *filter);
+
static const char *mbfl_encoding_8859_6_aliases[] = {"ISO8859-6", "arabic", NULL};
const mbfl_encoding mbfl_encoding_8859_6 = {
const struct mbfl_identify_vtbl vtbl_identify_8859_6 = {
mbfl_no_encoding_8859_6,
mbfl_filt_ident_common_ctor,
- mbfl_filt_ident_true
+ mbfl_filt_ident_iso8859_6
};
const struct mbfl_convert_vtbl vtbl_8859_6_wchar = {
return c;
}
+
+static int mbfl_filt_ident_iso8859_6(int c, mbfl_identify_filter *filter)
+{
+ if (c >= 0xA0 && !iso8859_6_ucs_table[c - 0xA0]) {
+ filter->status = 1;
+ }
+ return c;
+}