]> granicus.if.org Git - php/commitdiff
Drop multibyte character support in html_entity_decode().
authorMoriyoshi Koizumi <moriyoshi@php.net>
Thu, 2 Oct 2003 19:06:44 +0000 (19:06 +0000)
committerMoriyoshi Koizumi <moriyoshi@php.net>
Thu, 2 Oct 2003 19:06:44 +0000 (19:06 +0000)
# This has never worked as expected.

ext/standard/html.c

index 942d862c41f23300dcdceb11020e893859c66c2b..3d4851baaedc75611cf4a7fcce4df63f1d25fa56 100644 (file)
@@ -924,7 +924,10 @@ PHPAPI char *php_unescape_html_entities(unsigned char *old, int oldlen, int *new
                if (p[0] == '&') {
                        if (p + 2 < lim) {
                                if (p[1] == '#') {
+                                       int invalid_code = 0;
+
                                        code = strtol(p + 2, &next, 10);
+
                                        if (next != NULL && *next == ';') {
                                                switch (charset) {
                                                        case cs_utf_8:
@@ -934,7 +937,9 @@ PHPAPI char *php_unescape_html_entities(unsigned char *old, int oldlen, int *new
                                                        case cs_8859_1:
                                                        case cs_8859_5:
                                                        case cs_8859_15:
-                                                               if (0xa0 <= code && code <= 0xff) {
+                                                               if ((code >= 0x80 && code < 0xa0) || code > 0xff) {
+                                                                       invalid_code = 1;
+                                                               } else {
                                                                        *(q++) = code;
                                                                }
                                                                break;
@@ -942,24 +947,42 @@ PHPAPI char *php_unescape_html_entities(unsigned char *old, int oldlen, int *new
                                                        case cs_cp1252:
                                                        case cs_cp1251:
                                                        case cs_cp866:
-                                                               if (0x80 <= code && code <= 0xff) {
+                                                               if (code > 0xff) {
+                                                                       invalid_code = 1;
+                                                               } else {
                                                                        *(q++) = code;
                                                                }
                                                                break;
 
                                                        case cs_big5:
-                                                       case cs_gb2312:
                                                        case cs_big5hkscs:
                                                        case cs_sjis:
                                                        case cs_eucjp:
-                                                               if (code <= 0x7f) {
+                                                               if (code >= 0x80) {
+                                                                       invalid_code = 1;
+                                                               } else {
+                                                                       *(q++) = code;
+                                                               }
+                                                               break;
+
+                                                       case cs_gb2312:
+                                                               if (code >= 0x81) {
+                                                                       invalid_code = 1;
+                                                               } else {
                                                                        *(q++) = code;
                                                                }
                                                                break;
 
                                                        default:
+                                                               /* for backwards compatilibity */
+                                                               invalid_code = 1;
                                                                break;
                                                }
+                                               if (invalid_code) {
+                                                       for (; p <= next; p++) {
+                                                               *(q++) = *p;
+                                                       }
+                                               }
                                                p = next + 1;
                                        } else {
                                                *(q++) = *(p++);