- Identify filter was as wrong as wrong can be.
- Invalid CP1251 byte 0x98 was converted to Unicode 0xFFFD (generic
replacement character), rather than respecting `mb_substitute_character`.
- Unicode 0xFFFD was converted to some random CP1251 byte.
- When converting CP1251 to CP1251, don't pass invalid bytes through silently.
#define CK(statement) do { if ((statement) < 0) return (-1); } while (0)
-/*
- * cp1251 => wchar
- */
-int
-mbfl_filt_conv_cp1251_wchar(int c, mbfl_convert_filter *filter)
+int mbfl_filt_conv_cp1251_wchar(int c, mbfl_convert_filter *filter)
{
int s;
- if (c >= 0 && c < cp1251_ucs_table_min) {
+ if (c < cp1251_ucs_table_min) {
s = c;
- } else if (c >= cp1251_ucs_table_min && c < 0x100) {
+ } else {
s = cp1251_ucs_table[c - cp1251_ucs_table_min];
- if (s <= 0) {
- s = c;
- s &= MBFL_WCSPLANE_MASK;
- s |= MBFL_WCSPLANE_CP1251;
+ if (!s) {
+ s = c | MBFL_WCSGROUP_THROUGH;
}
- } else {
- s = c;
- s &= MBFL_WCSGROUP_MASK;
- s |= MBFL_WCSGROUP_THROUGH;
}
CK((*filter->output_function)(s, filter->data));
-
return c;
}
-/*
- * wchar => cp1251
- */
-int
-mbfl_filt_conv_wchar_cp1251(int c, mbfl_convert_filter *filter)
+int mbfl_filt_conv_wchar_cp1251(int c, mbfl_convert_filter *filter)
{
- int s, n;
-
if (c < 0x80) {
- s = c;
+ CK((*filter->output_function)(c, filter->data));
} else {
- s = -1;
- n = cp1251_ucs_table_len-1;
- while (n >= 0) {
+ for (int n = 0; n < cp1251_ucs_table_len; n++) {
if (c == cp1251_ucs_table[n]) {
- s = cp1251_ucs_table_min + n;
- break;
+ CK((*filter->output_function)(cp1251_ucs_table_min + n, filter->data));
+ return c;
}
- n--;
}
- if (s <= 0 && (c & ~MBFL_WCSPLANE_MASK) == MBFL_WCSPLANE_CP1251) {
- s = c & MBFL_WCSPLANE_MASK;
- }
- }
-
- if (s >= 0) {
- CK((*filter->output_function)(s, filter->data));
- } else {
CK(mbfl_filt_conv_illegal_output(c, filter));
}
return c;
}
-/* all of this is so ugly now! */
static int mbfl_filt_ident_cp1251(int c, mbfl_identify_filter *filter)
{
- if (c >= 0x80 && c <= 0xff)
- filter->flag = 0;
- else
- filter->flag = 1; /* not it */
+ /* Only one byte in this single-byte encoding is not used */
+ if (c == 0x98) {
+ filter->flag = 1;
+ }
return c;
}
0x0402, 0x0403, 0x201a, 0x0453, 0x201e, 0x2026, 0x2020, 0x2021,
0x20ac, 0x2030, 0x0409, 0x2039, 0x040a, 0x040c, 0x040b, 0x040f,
0x0452, 0x2018, 0x2019, 0x201c, 0x201d, 0x2022, 0x2013, 0x2014,
- 0xfffd, 0x2122, 0x0459, 0x203a, 0x045a, 0x045c, 0x045b, 0x045f,
+ 0x0000, 0x2122, 0x0459, 0x203a, 0x045a, 0x045c, 0x045b, 0x045f,
0x00a0, 0x040e, 0x045e, 0x0408, 0x00a4, 0x0490, 0x00a6, 0x00a7,
0x0401, 0x00a9, 0x0404, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x0407,
0x00b0, 0x00b1, 0x0406, 0x0456, 0x0491, 0x00b5, 0x00b6, 0x00b7,
#define MBFL_WCSPLANE_BIG5 0x70f40000 /* 2121h - 9898h */
#define MBFL_WCSPLANE_CNS11643 0x70f50000 /* 2121h - 9898h */
#define MBFL_WCSPLANE_UHC 0x70f60000 /* 8141h - fefeh */
-#define MBFL_WCSPLANE_CP1251 0x70f70000
#define MBFL_WCSPLANE_CP866 0x70f80000
#define MBFL_WCSPLANE_KOI8R 0x70f90000
#define MBFL_WCSPLANE_8859_16 0x70fa0000 /* 00h - FFh */