From: Alex Dowad Date: Sun, 18 Oct 2020 13:12:11 +0000 (+0200) Subject: Fix mbstring support for CP1251 encoding X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=44bd5804b00b6d37da266c0401fde3ac3476c6d9;p=php Fix mbstring support for CP1251 encoding - Identify filter was as wrong as wrong can be. - Invalid CP1251 byte 0x98 was converted to Unicode 0xFFFD (generic replacement character), rather than respecting `mb_substitute_character`. - Unicode 0xFFFD was converted to some random CP1251 byte. - When converting CP1251 to CP1251, don't pass invalid bytes through silently. --- diff --git a/ext/mbstring/libmbfl/filters/mbfilter_cp1251.c b/ext/mbstring/libmbfl/filters/mbfilter_cp1251.c index af05b5ec6d..cf5107b0d6 100644 --- a/ext/mbstring/libmbfl/filters/mbfilter_cp1251.c +++ b/ext/mbstring/libmbfl/filters/mbfilter_cp1251.c @@ -74,74 +74,45 @@ const struct mbfl_convert_vtbl vtbl_cp1251_wchar = { #define CK(statement) do { if ((statement) < 0) return (-1); } while (0) -/* - * cp1251 => wchar - */ -int -mbfl_filt_conv_cp1251_wchar(int c, mbfl_convert_filter *filter) +int mbfl_filt_conv_cp1251_wchar(int c, mbfl_convert_filter *filter) { int s; - if (c >= 0 && c < cp1251_ucs_table_min) { + if (c < cp1251_ucs_table_min) { s = c; - } else if (c >= cp1251_ucs_table_min && c < 0x100) { + } else { s = cp1251_ucs_table[c - cp1251_ucs_table_min]; - if (s <= 0) { - s = c; - s &= MBFL_WCSPLANE_MASK; - s |= MBFL_WCSPLANE_CP1251; + if (!s) { + s = c | MBFL_WCSGROUP_THROUGH; } - } else { - s = c; - s &= MBFL_WCSGROUP_MASK; - s |= MBFL_WCSGROUP_THROUGH; } CK((*filter->output_function)(s, filter->data)); - return c; } -/* - * wchar => cp1251 - */ -int -mbfl_filt_conv_wchar_cp1251(int c, mbfl_convert_filter *filter) +int mbfl_filt_conv_wchar_cp1251(int c, mbfl_convert_filter *filter) { - int s, n; - if (c < 0x80) { - s = c; + CK((*filter->output_function)(c, filter->data)); } else { - s = -1; - n = cp1251_ucs_table_len-1; - while (n >= 0) { + for (int n = 0; n < cp1251_ucs_table_len; n++) { if (c == cp1251_ucs_table[n]) { - s = cp1251_ucs_table_min + n; - break; + CK((*filter->output_function)(cp1251_ucs_table_min + n, filter->data)); + return c; } - n--; } - if (s <= 0 && (c & ~MBFL_WCSPLANE_MASK) == MBFL_WCSPLANE_CP1251) { - s = c & MBFL_WCSPLANE_MASK; - } - } - - if (s >= 0) { - CK((*filter->output_function)(s, filter->data)); - } else { CK(mbfl_filt_conv_illegal_output(c, filter)); } return c; } -/* all of this is so ugly now! */ static int mbfl_filt_ident_cp1251(int c, mbfl_identify_filter *filter) { - if (c >= 0x80 && c <= 0xff) - filter->flag = 0; - else - filter->flag = 1; /* not it */ + /* Only one byte in this single-byte encoding is not used */ + if (c == 0x98) { + filter->flag = 1; + } return c; } diff --git a/ext/mbstring/libmbfl/filters/unicode_table_cp1251.h b/ext/mbstring/libmbfl/filters/unicode_table_cp1251.h index f504713805..662552ab3b 100644 --- a/ext/mbstring/libmbfl/filters/unicode_table_cp1251.h +++ b/ext/mbstring/libmbfl/filters/unicode_table_cp1251.h @@ -30,7 +30,7 @@ static const unsigned short cp1251_ucs_table[] = { 0x0402, 0x0403, 0x201a, 0x0453, 0x201e, 0x2026, 0x2020, 0x2021, 0x20ac, 0x2030, 0x0409, 0x2039, 0x040a, 0x040c, 0x040b, 0x040f, 0x0452, 0x2018, 0x2019, 0x201c, 0x201d, 0x2022, 0x2013, 0x2014, - 0xfffd, 0x2122, 0x0459, 0x203a, 0x045a, 0x045c, 0x045b, 0x045f, + 0x0000, 0x2122, 0x0459, 0x203a, 0x045a, 0x045c, 0x045b, 0x045f, 0x00a0, 0x040e, 0x045e, 0x0408, 0x00a4, 0x0490, 0x00a6, 0x00a7, 0x0401, 0x00a9, 0x0404, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x0407, 0x00b0, 0x00b1, 0x0406, 0x0456, 0x0491, 0x00b5, 0x00b6, 0x00b7, diff --git a/ext/mbstring/libmbfl/mbfl/mbfl_consts.h b/ext/mbstring/libmbfl/mbfl/mbfl_consts.h index 3e15400879..f98015b572 100644 --- a/ext/mbstring/libmbfl/mbfl/mbfl_consts.h +++ b/ext/mbstring/libmbfl/mbfl/mbfl_consts.h @@ -70,7 +70,6 @@ #define MBFL_WCSPLANE_BIG5 0x70f40000 /* 2121h - 9898h */ #define MBFL_WCSPLANE_CNS11643 0x70f50000 /* 2121h - 9898h */ #define MBFL_WCSPLANE_UHC 0x70f60000 /* 8141h - fefeh */ -#define MBFL_WCSPLANE_CP1251 0x70f70000 #define MBFL_WCSPLANE_CP866 0x70f80000 #define MBFL_WCSPLANE_KOI8R 0x70f90000 #define MBFL_WCSPLANE_8859_16 0x70fa0000 /* 00h - FFh */