From 22a07344691222d4a687eba82a0e380c3be3267a Mon Sep 17 00:00:00 2001 From: Moriyoshi Koizumi Date: Thu, 31 Jul 2008 17:39:14 +0000 Subject: [PATCH] - MFH: Fixed bug #44617 (wrong HTML entity output when substitute_character=entity). --- NEWS | 2 + ext/mbstring/libmbfl/mbfl/mbfl_convert.c | 60 +++++++++++-------- .../tests/mb_substitute_character.phpt | 56 ++++++++--------- 3 files changed, 62 insertions(+), 56 deletions(-) diff --git a/NEWS b/NEWS index 7ec63d309f..9c539ddde5 100644 --- a/NEWS +++ b/NEWS @@ -52,6 +52,8 @@ PHP NEWS - Fixed bug #44716 (Progress notifications incorrect). (Hannes) - Fixed bug #44712 (stream_context_set_params segfaults on invalid arguments). (Hannes) +- Fixed bug #44617 (wrong HTML entity output when substitute_character=entity). + (Moriyoshi) - Fixed bug #44246 (closedir() accepts a file resource opened by fopen()). (Dmitry, Tony) - Fixed bug #44127 (UNIX abstract namespace socket connect does not work). diff --git a/ext/mbstring/libmbfl/mbfl/mbfl_convert.c b/ext/mbstring/libmbfl/mbfl/mbfl_convert.c index f6810738f9..8c3a9758d7 100644 --- a/ext/mbstring/libmbfl/mbfl/mbfl_convert.c +++ b/ext/mbstring/libmbfl/mbfl/mbfl_convert.c @@ -357,22 +357,6 @@ int mbfl_convert_filter_strcat(mbfl_convert_filter *filter, const unsigned char return 0; } -#if 0 -static int -mbfl_convert_filter_strncat(mbfl_convert_filter *filter, const unsigned char *p, - int n) -{ - while (n > 0) { - if ((*filter->filter_function)(*p++, filter) < 0) { - return -1; - } - n--; - } - - return n; -} -#endif - /* illegal character output function for conv-filter */ int mbfl_filt_conv_illegal_output(int c, mbfl_convert_filter *filter) @@ -387,14 +371,9 @@ mbfl_filt_conv_illegal_output(int c, mbfl_convert_filter *filter) ret = (*filter->filter_function)(filter->illegal_substchar, filter); break; case MBFL_OUTPUTFILTER_ILLEGAL_MODE_LONG: - case MBFL_OUTPUTFILTER_ILLEGAL_MODE_ENTITY: if (c >= 0) { if (c < MBFL_WCSGROUP_UCS4MAX) { /* unicode */ - if (mode_backup == MBFL_OUTPUTFILTER_ILLEGAL_MODE_LONG) { - ret = mbfl_convert_filter_strcat(filter, (const unsigned char *)"U+"); - } else { /* entity */ - ret = mbfl_convert_filter_strcat(filter, (const unsigned char *)"&#"); - } + ret = mbfl_convert_filter_strcat(filter, (const unsigned char *)"U+"); } else { if (c < MBFL_WCSGROUP_WCHARMAX) { m = c & ~MBFL_WCSPLANE_MASK; @@ -438,9 +417,38 @@ mbfl_filt_conv_illegal_output(int c, mbfl_convert_filter *filter) if (m == 0 && ret >= 0) { ret = (*filter->filter_function)(mbfl_hexchar_table[0], filter); } - if (mode_backup == MBFL_OUTPUTFILTER_ILLEGAL_MODE_ENTITY) { - ret = mbfl_convert_filter_strcat(filter, (const unsigned char *)";"); + } + } + break; + case MBFL_OUTPUTFILTER_ILLEGAL_MODE_ENTITY: + if (c >= 0) { + if (c < MBFL_WCSGROUP_UCS4MAX) { /* unicode */ + ret = mbfl_convert_filter_strcat(filter, (const unsigned char *)"&#x"); + if (ret < 0) + break; + + m = 0; + r = 28; + while (r >= 0) { + n = (c >> r) & 0xf; + if (n || m) { + m = 1; + ret = (*filter->filter_function)(mbfl_hexchar_table[n], filter); + if (ret < 0) { + break; + } + } + r -= 4; + } + if (ret < 0) { + break; + } + if (m == 0) { + ret = (*filter->filter_function)(mbfl_hexchar_table[0], filter); } + ret = mbfl_convert_filter_strcat(filter, (const unsigned char *)";"); + } else { + ret = (*filter->filter_function)(filter->illegal_substchar, filter); } } break; @@ -462,8 +470,8 @@ const struct mbfl_convert_vtbl * mbfl_convert_filter_get_vtbl(enum mbfl_no_encod to == mbfl_no_encoding_7bit) { from = mbfl_no_encoding_8bit; } else if (from == mbfl_no_encoding_base64 || - from == mbfl_no_encoding_qprint || - from == mbfl_no_encoding_uuencode) { + from == mbfl_no_encoding_qprint || + from == mbfl_no_encoding_uuencode) { to = mbfl_no_encoding_8bit; } diff --git a/ext/mbstring/tests/mb_substitute_character.phpt b/ext/mbstring/tests/mb_substitute_character.phpt index 9f2131080e..9f35f1d266 100644 --- a/ext/mbstring/tests/mb_substitute_character.phpt +++ b/ext/mbstring/tests/mb_substitute_character.phpt @@ -10,40 +10,36 @@ include_once('common.inc'); // Note: It does not return TRUE/FALSE for setting char -// Use Unicode val -$r = mb_substitute_character(0x3013); -//$r = mb_substitute_character('U+3013'); -($r === TRUE) ? print "OK_UTF\n" : print("NG_UTF: ".gettype($r)." $r\n"); -print mb_substitute_character() . "\n"; +var_dump(mb_substitute_character(0x3044)); +var_dump(mb_substitute_character()); +var_dump(bin2hex(mb_convert_encoding("\xe2\x99\xa0\xe3\x81\x82", "CP932", "UTF-8"))); +var_dump(mb_substitute_character('long')); +var_dump(mb_substitute_character()); +var_dump(bin2hex(mb_convert_encoding("\xe2\x99\xa0\xe3\x81\x82", "CP932", "UTF-8"))); -// Use "long" -$r = mb_substitute_character('long'); -($r === TRUE) ? print "OK_LONG\n" : print("NG_LONG: ".gettype($r)." $r\n"); -print mb_substitute_character() . "\n"; +var_dump(mb_substitute_character('none')); +var_dump(mb_substitute_character()); +var_dump(bin2hex(mb_convert_encoding("\xe2\x99\xa0\xe3\x81\x82", "CP932", "UTF-8"))); +var_dump(mb_substitute_character('entity')); +var_dump(mb_substitute_character()); +var_dump(bin2hex(mb_convert_encoding("\xe2\x99\xa0\xe3\x81\x82", "CP932", "UTF-8"))); -// Use "none" -$r = mb_substitute_character('none'); -($r === TRUE) ? print "OK_NONE\n" : print("NG_NONE: ".gettype($r)." $r\n"); -print mb_substitute_character() . "\n"; - - -// Set invalid string. Should fail. -print "== INVALID PARAMETER ==\n"; -$r = mb_substitute_character('BAD_NAME'); -($r === FALSE) ? print "OK_BAD_NAME\n" : print("NG_BAD_NAME: ".gettype($r)." $r\n"); - +var_dump(mb_substitute_character('BAD_NAME')); ?> - --EXPECT-- -OK_UTF -12307 -OK_LONG -long -OK_NONE -none -== INVALID PARAMETER == +bool(true) +int(12356) +string(8) "82a282a0" +bool(true) +string(4) "long" +string(16) "552b3236363082a0" +bool(true) +string(4) "none" +string(4) "82a0" +bool(true) +string(6) "entity" +string(20) "262378323636303b82a0" ERR: Warning -OK_BAD_NAME - +bool(false) -- 2.40.0