]> granicus.if.org Git - php/commitdiff
- Fixed bug #44617 (wrong HTML entity output when substitute_character=entity)
authorMoriyoshi Koizumi <moriyoshi@php.net>
Thu, 31 Jul 2008 17:36:35 +0000 (17:36 +0000)
committerMoriyoshi Koizumi <moriyoshi@php.net>
Thu, 31 Jul 2008 17:36:35 +0000 (17:36 +0000)
ext/mbstring/libmbfl/mbfl/mbfl_convert.c
ext/mbstring/tests/mb_substitute_character.phpt

index 42ee2e47d599dbb8d8732d591d3eed0e26991080..2d06bbd316afd228ff0eccb7bc6819495f14aad8 100644 (file)
@@ -356,22 +356,6 @@ int mbfl_convert_filter_strcat(mbfl_convert_filter *filter, const unsigned char
        return 0;
 }
 
-#if 0
-static int
-mbfl_convert_filter_strncat(mbfl_convert_filter *filter, const unsigned char *p, 
-                           int n)
-{
-       while (n > 0) {
-               if ((*filter->filter_function)(*p++, filter) < 0) {
-                       return -1;
-               }
-               n--;
-       }
-
-       return n;
-}
-#endif
-
 /* illegal character output function for conv-filter */
 int
 mbfl_filt_conv_illegal_output(int c, mbfl_convert_filter *filter)
@@ -386,14 +370,9 @@ mbfl_filt_conv_illegal_output(int c, mbfl_convert_filter *filter)
                ret = (*filter->filter_function)(filter->illegal_substchar, filter);
                break;
        case MBFL_OUTPUTFILTER_ILLEGAL_MODE_LONG:
-       case MBFL_OUTPUTFILTER_ILLEGAL_MODE_ENTITY:
                if (c >= 0) {
                        if (c < MBFL_WCSGROUP_UCS4MAX) {        /* unicode */
-                         if (mode_backup == MBFL_OUTPUTFILTER_ILLEGAL_MODE_LONG) {
-                           ret = mbfl_convert_filter_strcat(filter, (const unsigned char *)"U+");
-                         } else { /* entity */
-                           ret = mbfl_convert_filter_strcat(filter, (const unsigned char *)"&#");
-                         }
+                               ret = mbfl_convert_filter_strcat(filter, (const unsigned char *)"U+");
                        } else {
                                if (c < MBFL_WCSGROUP_WCHARMAX) {
                                        m = c & ~MBFL_WCSPLANE_MASK;
@@ -437,9 +416,38 @@ mbfl_filt_conv_illegal_output(int c, mbfl_convert_filter *filter)
                                if (m == 0 && ret >= 0) {
                                        ret = (*filter->filter_function)(mbfl_hexchar_table[0], filter);
                                }
-                               if (mode_backup == MBFL_OUTPUTFILTER_ILLEGAL_MODE_ENTITY) {
-                                 ret = mbfl_convert_filter_strcat(filter, (const unsigned char *)";");
+                       }
+               }
+               break;
+       case MBFL_OUTPUTFILTER_ILLEGAL_MODE_ENTITY:
+               if (c >= 0) {
+                       if (c < MBFL_WCSGROUP_UCS4MAX) {        /* unicode */
+                               ret = mbfl_convert_filter_strcat(filter, (const unsigned char *)"&#x");
+                               if (ret < 0)
+                                       break;
+
+                               m = 0;
+                               r = 28;
+                               while (r >= 0) {
+                                       n = (c >> r) & 0xf;
+                                       if (n || m) {
+                                               m = 1;
+                                               ret = (*filter->filter_function)(mbfl_hexchar_table[n], filter);
+                                               if (ret < 0) {
+                                                       break;
+                                               }
+                                       }
+                                       r -= 4;
+                               }
+                               if (ret < 0) {
+                                       break;
+                               }
+                               if (m == 0) {
+                                       ret = (*filter->filter_function)(mbfl_hexchar_table[0], filter);
                                }
+                               ret = mbfl_convert_filter_strcat(filter, (const unsigned char *)";");
+                       } else {
+                               ret = (*filter->filter_function)(filter->illegal_substchar, filter);
                        }
                }
                break;
@@ -461,8 +469,8 @@ const struct mbfl_convert_vtbl * mbfl_convert_filter_get_vtbl(enum mbfl_no_encod
            to == mbfl_no_encoding_7bit) {
                from = mbfl_no_encoding_8bit;
        } else if (from == mbfl_no_encoding_base64 ||
-                  from == mbfl_no_encoding_qprint ||
-                                 from == mbfl_no_encoding_uuencode) {
+                          from == mbfl_no_encoding_qprint ||
+                          from == mbfl_no_encoding_uuencode) {
                to = mbfl_no_encoding_8bit;
        }
 
index 9f2131080ede382be2f0233bd07ae48d8a172678..9f35f1d2662f548c53bd5b1b1b2de9bb6ad5ff07 100644 (file)
@@ -10,40 +10,36 @@ include_once('common.inc');
 
 // Note: It does not return TRUE/FALSE for setting char
 
-// Use Unicode val
-$r = mb_substitute_character(0x3013);
-//$r = mb_substitute_character('U+3013');
-($r === TRUE) ?    print "OK_UTF\n" :  print("NG_UTF: ".gettype($r)." $r\n");
-print mb_substitute_character() . "\n";
+var_dump(mb_substitute_character(0x3044));
+var_dump(mb_substitute_character());
+var_dump(bin2hex(mb_convert_encoding("\xe2\x99\xa0\xe3\x81\x82", "CP932", "UTF-8")));
 
+var_dump(mb_substitute_character('long'));
+var_dump(mb_substitute_character());
+var_dump(bin2hex(mb_convert_encoding("\xe2\x99\xa0\xe3\x81\x82", "CP932", "UTF-8")));
 
-// Use "long"
-$r = mb_substitute_character('long');
-($r === TRUE) ? print "OK_LONG\n" : print("NG_LONG: ".gettype($r)." $r\n");
-print mb_substitute_character() . "\n";
+var_dump(mb_substitute_character('none'));
+var_dump(mb_substitute_character());
+var_dump(bin2hex(mb_convert_encoding("\xe2\x99\xa0\xe3\x81\x82", "CP932", "UTF-8")));
 
+var_dump(mb_substitute_character('entity'));
+var_dump(mb_substitute_character());
+var_dump(bin2hex(mb_convert_encoding("\xe2\x99\xa0\xe3\x81\x82", "CP932", "UTF-8")));
 
-// Use "none"
-$r = mb_substitute_character('none');
-($r === TRUE) ? print "OK_NONE\n" : print("NG_NONE: ".gettype($r)." $r\n");
-print mb_substitute_character() . "\n";
-
-
-// Set invalid string. Should fail.
-print "== INVALID PARAMETER ==\n";
-$r = mb_substitute_character('BAD_NAME');
-($r === FALSE) ? print "OK_BAD_NAME\n" : print("NG_BAD_NAME: ".gettype($r)." $r\n");
-
+var_dump(mb_substitute_character('BAD_NAME'));
 ?>
-
 --EXPECT--
-OK_UTF
-12307
-OK_LONG
-long
-OK_NONE
-none
-== INVALID PARAMETER ==
+bool(true)
+int(12356)
+string(8) "82a282a0"
+bool(true)
+string(4) "long"
+string(16) "552b3236363082a0"
+bool(true)
+string(4) "none"
+string(4) "82a0"
+bool(true)
+string(6) "entity"
+string(20) "262378323636303b82a0"
 ERR: Warning
-OK_BAD_NAME
-
+bool(false)