]> granicus.if.org Git - php/commitdiff
Fixed bug #69267 completely
authorNikita Popov <nikita.ppv@gmail.com>
Sun, 23 Jul 2017 13:30:17 +0000 (15:30 +0200)
committerNikita Popov <nikita.ppv@gmail.com>
Sun, 23 Jul 2017 13:30:17 +0000 (15:30 +0200)
ucgendat.c was assuming that a title-case character is a character
that has both lower and upper-case variants. However, there are
title-case characters that only have a lower-case variant. Use the
Lt general character proprety to determine where in the case map
the character should be placed instead.

NEWS
ext/mbstring/tests/bug69267.phpt
ext/mbstring/ucgendat/ucgendat.c
ext/mbstring/unicode_data.h

diff --git a/NEWS b/NEWS
index 06bb8448bc25e81ff06637d3f145591bda65fef9..62f3f8010f9e0ae923f1e6fd8e5a4aade41b3f91 100644 (file)
--- a/NEWS
+++ b/NEWS
@@ -7,6 +7,7 @@ PHP                                                                        NEWS
   . Fixed bug #74954 (null deref and segfault in zend_generator_resume()). (Bob)
 
 - Mbstring:
+  . Fixed bug #69267 (mb_strtolower fails on titlecase characters). (Nikita)
   . Fixed bug #71606 (Segmentation fault mb_strcut with HTML-ENTITIES encoding).
     (cmb)
 
index 8d429411ac57a295468598ae660ba59d360da3dc..958f1c548b930ed50fd80ccc6f1b3a445b65dc00 100644 (file)
@@ -2,6 +2,7 @@
 Bug #69267: mb_strtolower fails on titlecase characters
 --FILE--
 <?php
+
 $str_l = "džljnjdz";
 $str_u = "DŽLJNJDZ";
 $str_t = "DžLjNjDz";
@@ -14,6 +15,16 @@ var_dump(mb_strtoupper($str_t));
 var_dump(mb_convert_case($str_l, MB_CASE_TITLE));
 var_dump(mb_convert_case($str_u, MB_CASE_TITLE));
 var_dump(mb_convert_case($str_t, MB_CASE_TITLE));
+
+$str_l = "ᾳ";
+$str_t = "ᾼ";
+var_dump(mb_strtolower($str_l));
+var_dump(mb_strtolower($str_t));
+var_dump(mb_strtoupper($str_l));
+var_dump(mb_strtoupper($str_t));
+var_dump(mb_convert_case($str_l, MB_CASE_TITLE));
+var_dump(mb_convert_case($str_t, MB_CASE_TITLE));
+
 ?>
 --EXPECT--
 string(8) "džljnjdz"
@@ -25,3 +36,9 @@ string(8) "DŽLJNJDZ"
 string(8) "Džljnjdz"
 string(8) "Džljnjdz"
 string(8) "Džljnjdz"
+string(3) "ᾳ"
+string(3) "ᾳ"
+string(3) "ᾼ"
+string(3) "ᾼ"
+string(3) "ᾼ"
+string(3) "ᾼ"
index 80977c5310b2ccdb2c0013590ec594141151e43f..42441082ef398dd88ccf7415fe93cfa27dd0ded4 100644 (file)
@@ -539,6 +539,10 @@ add_title(ac_uint4 code)
      */
     cases[2] = code;
 
+    /* If lower/upper case does not exist, stay the same */
+    if (!cases[0]) cases[0] = code;
+    if (!cases[1]) cases[1] = code;
+
     if (title_used == title_size) {
         if (title_size == 0)
           title = (_case_t *) malloc(sizeof(_case_t) << 3);
@@ -825,7 +829,9 @@ read_cdata(FILE *in)
 
     lineno = skip = 0;
     while (fgets(line, sizeof(line), in)) {
-       if( (s=strchr(line, '\n')) ) *s = '\0';
+        int is_title = 0;
+
+        if( (s=strchr(line, '\n')) ) *s = '\0';
         lineno++;
 
         /*
@@ -968,6 +974,10 @@ read_cdata(FILE *in)
 
         ordered_range_insert(code, s, e - s);
 
+        if (e - s == 2 && s[0] == 'L' && s[1] == 't') {
+            is_title = 1;
+        }
+
         /*
          * Locate the combining class code.
          */
@@ -1112,7 +1122,7 @@ read_cdata(FILE *in)
             if (*s == ';')
               s++;
         }
-        if (cases[0] && cases[1])
+        if (is_title)
           /*
            * Add the upper and lower mappings for a title case character.
            */
index 51b1ea65c99a5b08603d64bbef290a73cbdf799a..c176128cf14213fe317939088af4f373ce8ca8cb 100644 (file)
@@ -2469,7 +2469,7 @@ static const unsigned int _uccase_size = 2470;
  * LowerIndex = _uccase_len[0]
  * TitleIndex = LowerIndex + _uccase_len[1] */
 
-static const unsigned short _uccase_len[2] = {1229, 1237};
+static const unsigned short _uccase_len[2] = {1202, 1237};
 
 static const unsigned int _uccase_map[] = {
        0x00000041, 0x00000061, 0x00000041,
@@ -3235,40 +3235,14 @@ static const unsigned int _uccase_map[] = {
        0x00001f6d, 0x00001f65, 0x00001f6d,
        0x00001f6e, 0x00001f66, 0x00001f6e,
        0x00001f6f, 0x00001f67, 0x00001f6f,
-       0x00001f88, 0x00001f80, 0x00001f88,
-       0x00001f89, 0x00001f81, 0x00001f89,
-       0x00001f8a, 0x00001f82, 0x00001f8a,
-       0x00001f8b, 0x00001f83, 0x00001f8b,
-       0x00001f8c, 0x00001f84, 0x00001f8c,
-       0x00001f8d, 0x00001f85, 0x00001f8d,
-       0x00001f8e, 0x00001f86, 0x00001f8e,
-       0x00001f8f, 0x00001f87, 0x00001f8f,
-       0x00001f98, 0x00001f90, 0x00001f98,
-       0x00001f99, 0x00001f91, 0x00001f99,
-       0x00001f9a, 0x00001f92, 0x00001f9a,
-       0x00001f9b, 0x00001f93, 0x00001f9b,
-       0x00001f9c, 0x00001f94, 0x00001f9c,
-       0x00001f9d, 0x00001f95, 0x00001f9d,
-       0x00001f9e, 0x00001f96, 0x00001f9e,
-       0x00001f9f, 0x00001f97, 0x00001f9f,
-       0x00001fa8, 0x00001fa0, 0x00001fa8,
-       0x00001fa9, 0x00001fa1, 0x00001fa9,
-       0x00001faa, 0x00001fa2, 0x00001faa,
-       0x00001fab, 0x00001fa3, 0x00001fab,
-       0x00001fac, 0x00001fa4, 0x00001fac,
-       0x00001fad, 0x00001fa5, 0x00001fad,
-       0x00001fae, 0x00001fa6, 0x00001fae,
-       0x00001faf, 0x00001fa7, 0x00001faf,
        0x00001fb8, 0x00001fb0, 0x00001fb8,
        0x00001fb9, 0x00001fb1, 0x00001fb9,
        0x00001fba, 0x00001f70, 0x00001fba,
        0x00001fbb, 0x00001f71, 0x00001fbb,
-       0x00001fbc, 0x00001fb3, 0x00001fbc,
        0x00001fc8, 0x00001f72, 0x00001fc8,
        0x00001fc9, 0x00001f73, 0x00001fc9,
        0x00001fca, 0x00001f74, 0x00001fca,
        0x00001fcb, 0x00001f75, 0x00001fcb,
-       0x00001fcc, 0x00001fc3, 0x00001fcc,
        0x00001fd8, 0x00001fd0, 0x00001fd8,
        0x00001fd9, 0x00001fd1, 0x00001fd9,
        0x00001fda, 0x00001f76, 0x00001fda,
@@ -3282,7 +3256,6 @@ static const unsigned int _uccase_map[] = {
        0x00001ff9, 0x00001f79, 0x00001ff9,
        0x00001ffa, 0x00001f7c, 0x00001ffa,
        0x00001ffb, 0x00001f7d, 0x00001ffb,
-       0x00001ffc, 0x00001ff3, 0x00001ffc,
        0x00002126, 0x000003c9, 0x00002126,
        0x0000212a, 0x0000006b, 0x0000212a,
        0x0000212b, 0x000000e5, 0x0000212b,
@@ -4941,6 +4914,33 @@ static const unsigned int _uccase_map[] = {
        0x000001c5, 0x000001c4, 0x000001c6,
        0x000001c8, 0x000001c7, 0x000001c9,
        0x000001cb, 0x000001ca, 0x000001cc,
-       0x000001f2, 0x000001f1, 0x000001f3
+       0x000001f2, 0x000001f1, 0x000001f3,
+       0x00001f88, 0x00001f88, 0x00001f80,
+       0x00001f89, 0x00001f89, 0x00001f81,
+       0x00001f8a, 0x00001f8a, 0x00001f82,
+       0x00001f8b, 0x00001f8b, 0x00001f83,
+       0x00001f8c, 0x00001f8c, 0x00001f84,
+       0x00001f8d, 0x00001f8d, 0x00001f85,
+       0x00001f8e, 0x00001f8e, 0x00001f86,
+       0x00001f8f, 0x00001f8f, 0x00001f87,
+       0x00001f98, 0x00001f98, 0x00001f90,
+       0x00001f99, 0x00001f99, 0x00001f91,
+       0x00001f9a, 0x00001f9a, 0x00001f92,
+       0x00001f9b, 0x00001f9b, 0x00001f93,
+       0x00001f9c, 0x00001f9c, 0x00001f94,
+       0x00001f9d, 0x00001f9d, 0x00001f95,
+       0x00001f9e, 0x00001f9e, 0x00001f96,
+       0x00001f9f, 0x00001f9f, 0x00001f97,
+       0x00001fa8, 0x00001fa8, 0x00001fa0,
+       0x00001fa9, 0x00001fa9, 0x00001fa1,
+       0x00001faa, 0x00001faa, 0x00001fa2,
+       0x00001fab, 0x00001fab, 0x00001fa3,
+       0x00001fac, 0x00001fac, 0x00001fa4,
+       0x00001fad, 0x00001fad, 0x00001fa5,
+       0x00001fae, 0x00001fae, 0x00001fa6,
+       0x00001faf, 0x00001faf, 0x00001fa7,
+       0x00001fbc, 0x00001fbc, 0x00001fb3,
+       0x00001fcc, 0x00001fcc, 0x00001fc3,
+       0x00001ffc, 0x00001ffc, 0x00001ff3
 };