From 077e61fad3c5fbdbe8df4371b88f64bcc4a6bccd Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Sun, 23 Jul 2017 15:30:17 +0200 Subject: [PATCH] Fixed bug #69267 completely ucgendat.c was assuming that a title-case character is a character that has both lower and upper-case variants. However, there are title-case characters that only have a lower-case variant. Use the Lt general character proprety to determine where in the case map the character should be placed instead. --- NEWS | 1 + ext/mbstring/tests/bug69267.phpt | 17 ++++++++++ ext/mbstring/ucgendat/ucgendat.c | 14 ++++++-- ext/mbstring/unicode_data.h | 58 ++++++++++++++++---------------- 4 files changed, 59 insertions(+), 31 deletions(-) diff --git a/NEWS b/NEWS index 06bb8448bc..62f3f8010f 100644 --- a/NEWS +++ b/NEWS @@ -7,6 +7,7 @@ PHP NEWS . Fixed bug #74954 (null deref and segfault in zend_generator_resume()). (Bob) - Mbstring: + . Fixed bug #69267 (mb_strtolower fails on titlecase characters). (Nikita) . Fixed bug #71606 (Segmentation fault mb_strcut with HTML-ENTITIES encoding). (cmb) diff --git a/ext/mbstring/tests/bug69267.phpt b/ext/mbstring/tests/bug69267.phpt index 8d429411ac..958f1c548b 100644 --- a/ext/mbstring/tests/bug69267.phpt +++ b/ext/mbstring/tests/bug69267.phpt @@ -2,6 +2,7 @@ Bug #69267: mb_strtolower fails on titlecase characters --FILE-- --EXPECT-- string(8) "džljnjdz" @@ -25,3 +36,9 @@ string(8) "DŽLJNJDZ" string(8) "Džljnjdz" string(8) "Džljnjdz" string(8) "Džljnjdz" +string(3) "ᾳ" +string(3) "ᾳ" +string(3) "ᾼ" +string(3) "ᾼ" +string(3) "ᾼ" +string(3) "ᾼ" diff --git a/ext/mbstring/ucgendat/ucgendat.c b/ext/mbstring/ucgendat/ucgendat.c index 80977c5310..42441082ef 100644 --- a/ext/mbstring/ucgendat/ucgendat.c +++ b/ext/mbstring/ucgendat/ucgendat.c @@ -539,6 +539,10 @@ add_title(ac_uint4 code) */ cases[2] = code; + /* If lower/upper case does not exist, stay the same */ + if (!cases[0]) cases[0] = code; + if (!cases[1]) cases[1] = code; + if (title_used == title_size) { if (title_size == 0) title = (_case_t *) malloc(sizeof(_case_t) << 3); @@ -825,7 +829,9 @@ read_cdata(FILE *in) lineno = skip = 0; while (fgets(line, sizeof(line), in)) { - if( (s=strchr(line, '\n')) ) *s = '\0'; + int is_title = 0; + + if( (s=strchr(line, '\n')) ) *s = '\0'; lineno++; /* @@ -968,6 +974,10 @@ read_cdata(FILE *in) ordered_range_insert(code, s, e - s); + if (e - s == 2 && s[0] == 'L' && s[1] == 't') { + is_title = 1; + } + /* * Locate the combining class code. */ @@ -1112,7 +1122,7 @@ read_cdata(FILE *in) if (*s == ';') s++; } - if (cases[0] && cases[1]) + if (is_title) /* * Add the upper and lower mappings for a title case character. */ diff --git a/ext/mbstring/unicode_data.h b/ext/mbstring/unicode_data.h index 51b1ea65c9..c176128cf1 100644 --- a/ext/mbstring/unicode_data.h +++ b/ext/mbstring/unicode_data.h @@ -2469,7 +2469,7 @@ static const unsigned int _uccase_size = 2470; * LowerIndex = _uccase_len[0] * TitleIndex = LowerIndex + _uccase_len[1] */ -static const unsigned short _uccase_len[2] = {1229, 1237}; +static const unsigned short _uccase_len[2] = {1202, 1237}; static const unsigned int _uccase_map[] = { 0x00000041, 0x00000061, 0x00000041, @@ -3235,40 +3235,14 @@ static const unsigned int _uccase_map[] = { 0x00001f6d, 0x00001f65, 0x00001f6d, 0x00001f6e, 0x00001f66, 0x00001f6e, 0x00001f6f, 0x00001f67, 0x00001f6f, - 0x00001f88, 0x00001f80, 0x00001f88, - 0x00001f89, 0x00001f81, 0x00001f89, - 0x00001f8a, 0x00001f82, 0x00001f8a, - 0x00001f8b, 0x00001f83, 0x00001f8b, - 0x00001f8c, 0x00001f84, 0x00001f8c, - 0x00001f8d, 0x00001f85, 0x00001f8d, - 0x00001f8e, 0x00001f86, 0x00001f8e, - 0x00001f8f, 0x00001f87, 0x00001f8f, - 0x00001f98, 0x00001f90, 0x00001f98, - 0x00001f99, 0x00001f91, 0x00001f99, - 0x00001f9a, 0x00001f92, 0x00001f9a, - 0x00001f9b, 0x00001f93, 0x00001f9b, - 0x00001f9c, 0x00001f94, 0x00001f9c, - 0x00001f9d, 0x00001f95, 0x00001f9d, - 0x00001f9e, 0x00001f96, 0x00001f9e, - 0x00001f9f, 0x00001f97, 0x00001f9f, - 0x00001fa8, 0x00001fa0, 0x00001fa8, - 0x00001fa9, 0x00001fa1, 0x00001fa9, - 0x00001faa, 0x00001fa2, 0x00001faa, - 0x00001fab, 0x00001fa3, 0x00001fab, - 0x00001fac, 0x00001fa4, 0x00001fac, - 0x00001fad, 0x00001fa5, 0x00001fad, - 0x00001fae, 0x00001fa6, 0x00001fae, - 0x00001faf, 0x00001fa7, 0x00001faf, 0x00001fb8, 0x00001fb0, 0x00001fb8, 0x00001fb9, 0x00001fb1, 0x00001fb9, 0x00001fba, 0x00001f70, 0x00001fba, 0x00001fbb, 0x00001f71, 0x00001fbb, - 0x00001fbc, 0x00001fb3, 0x00001fbc, 0x00001fc8, 0x00001f72, 0x00001fc8, 0x00001fc9, 0x00001f73, 0x00001fc9, 0x00001fca, 0x00001f74, 0x00001fca, 0x00001fcb, 0x00001f75, 0x00001fcb, - 0x00001fcc, 0x00001fc3, 0x00001fcc, 0x00001fd8, 0x00001fd0, 0x00001fd8, 0x00001fd9, 0x00001fd1, 0x00001fd9, 0x00001fda, 0x00001f76, 0x00001fda, @@ -3282,7 +3256,6 @@ static const unsigned int _uccase_map[] = { 0x00001ff9, 0x00001f79, 0x00001ff9, 0x00001ffa, 0x00001f7c, 0x00001ffa, 0x00001ffb, 0x00001f7d, 0x00001ffb, - 0x00001ffc, 0x00001ff3, 0x00001ffc, 0x00002126, 0x000003c9, 0x00002126, 0x0000212a, 0x0000006b, 0x0000212a, 0x0000212b, 0x000000e5, 0x0000212b, @@ -4941,6 +4914,33 @@ static const unsigned int _uccase_map[] = { 0x000001c5, 0x000001c4, 0x000001c6, 0x000001c8, 0x000001c7, 0x000001c9, 0x000001cb, 0x000001ca, 0x000001cc, - 0x000001f2, 0x000001f1, 0x000001f3 + 0x000001f2, 0x000001f1, 0x000001f3, + 0x00001f88, 0x00001f88, 0x00001f80, + 0x00001f89, 0x00001f89, 0x00001f81, + 0x00001f8a, 0x00001f8a, 0x00001f82, + 0x00001f8b, 0x00001f8b, 0x00001f83, + 0x00001f8c, 0x00001f8c, 0x00001f84, + 0x00001f8d, 0x00001f8d, 0x00001f85, + 0x00001f8e, 0x00001f8e, 0x00001f86, + 0x00001f8f, 0x00001f8f, 0x00001f87, + 0x00001f98, 0x00001f98, 0x00001f90, + 0x00001f99, 0x00001f99, 0x00001f91, + 0x00001f9a, 0x00001f9a, 0x00001f92, + 0x00001f9b, 0x00001f9b, 0x00001f93, + 0x00001f9c, 0x00001f9c, 0x00001f94, + 0x00001f9d, 0x00001f9d, 0x00001f95, + 0x00001f9e, 0x00001f9e, 0x00001f96, + 0x00001f9f, 0x00001f9f, 0x00001f97, + 0x00001fa8, 0x00001fa8, 0x00001fa0, + 0x00001fa9, 0x00001fa9, 0x00001fa1, + 0x00001faa, 0x00001faa, 0x00001fa2, + 0x00001fab, 0x00001fab, 0x00001fa3, + 0x00001fac, 0x00001fac, 0x00001fa4, + 0x00001fad, 0x00001fad, 0x00001fa5, + 0x00001fae, 0x00001fae, 0x00001fa6, + 0x00001faf, 0x00001faf, 0x00001fa7, + 0x00001fbc, 0x00001fbc, 0x00001fb3, + 0x00001fcc, 0x00001fcc, 0x00001fc3, + 0x00001ffc, 0x00001ffc, 0x00001ff3 }; -- 2.40.0