]> granicus.if.org Git - php/commitdiff
- Bug #49785: take 5. What the hell happened to me...
authorMoriyoshi Koizumi <moriyoshi@php.net>
Tue, 13 Oct 2009 05:18:37 +0000 (05:18 +0000)
committerMoriyoshi Koizumi <moriyoshi@php.net>
Tue, 13 Oct 2009 05:18:37 +0000 (05:18 +0000)
ext/standard/html.c
ext/standard/tests/strings/bug49785.phpt

index 44ca59b16c7d4c14cad980d466891794b54d08d4..14b4f2cabad614d6bbb0113091673c7a1637cf96 100644 (file)
@@ -539,19 +539,18 @@ inline static unsigned int get_next_char(enum entity_charset charset,
                                c = str[pos];
                                if (c < 0x80) {
                                        MB_WRITE(c);
-                    this_char = c;
+                                       this_char = c;
                                        pos++;
                                } else if (c < 0xc0) {
                                        MB_FAILURE(pos);
                                } else if (c < 0xe0) {
                                        CHECK_LEN(pos, 2);
                                        if (str[pos + 1] < 0x80 || str[pos + 1] > 0xbf) {
-                        MB_FAILURE(pos);
+                                               MB_FAILURE(pos);
                                        }
                                        this_char = ((c & 0x1f) << 6) | (str[pos + 1] & 0x3f);
                                        if (this_char < 0x80) {
-                                               *status = FAILURE;
-                                               return 0;
+                                               MB_FAILURE(pos);
                                        }
                                        MB_WRITE((unsigned char)c);
                                        MB_WRITE((unsigned char)str[pos + 1]);
@@ -559,14 +558,14 @@ inline static unsigned int get_next_char(enum entity_charset charset,
                                } else if (c < 0xf0) {
                                        CHECK_LEN(pos, 3);
                                        if (str[pos + 1] < 0x80 || str[pos + 1] > 0xbf) {
-                        MB_FAILURE(pos);
+                                               MB_FAILURE(pos);
                                        }
                                        if (str[pos + 2] < 0x80 || str[pos + 2] > 0xbf) {
-                        MB_FAILURE(pos);
+                                               MB_FAILURE(pos);
                                        }
                                        this_char = ((c & 0x0f) << 12) | ((str[pos + 1] & 0x3f) << 6) | (str[pos + 2] & 0x3f);
                                        if (this_char < 0x800) {
-                        MB_FAILURE(pos);
+                                               MB_FAILURE(pos);
                                        }
                                        MB_WRITE((unsigned char)c);
                                        MB_WRITE((unsigned char)str[pos + 1]);
@@ -575,17 +574,17 @@ inline static unsigned int get_next_char(enum entity_charset charset,
                                } else if (c < 0xf8) {
                                        CHECK_LEN(pos, 4);
                                        if (str[pos + 1] < 0x80 || str[pos + 1] > 0xbf) {
-                        MB_FAILURE(pos);
+                                               MB_FAILURE(pos);
                                        }
                                        if (str[pos + 2] < 0x80 || str[pos + 2] > 0xbf) {
-                        MB_FAILURE(pos);
+                                               MB_FAILURE(pos);
                                        }
                                        if (str[pos + 3] < 0x80 || str[pos + 3] > 0xbf) {
-                        MB_FAILURE(pos);
+                                               MB_FAILURE(pos);
                                        }
                                        this_char = ((c & 0x07) << 18) | ((str[pos + 1] & 0x3f) << 12) | ((str[pos + 2] & 0x3f) << 6) | (str[pos + 3] & 0x3f);
                                        if (this_char < 0x10000) {
-                        MB_FAILURE(pos);
+                                               MB_FAILURE(pos);
                                        }
                                        MB_WRITE((unsigned char)c);
                                        MB_WRITE((unsigned char)str[pos + 1]);
@@ -593,7 +592,7 @@ inline static unsigned int get_next_char(enum entity_charset charset,
                                        MB_WRITE((unsigned char)str[pos + 3]);
                                        pos += 4;
                                } else {
-                    MB_FAILURE(pos);
+                                       MB_FAILURE(pos);
                                }
                        }
                        break;
index f344855931cb672074d4957e8d0dde57813829fd..7b93c8c7168fa65d8030ddc4236293183bcab8b2 100644 (file)
@@ -35,6 +35,12 @@ var_dump(_bin2hex(htmlentities("\xf7\xbf\xff\xbf", ENT_QUOTES, "UTF-8")));
 var_dump(_bin2hex(htmlentities("\xf7\xbf\xbf\xff", ENT_QUOTES, "UTF-8")));
 var_dump(_bin2hex(htmlentities("\xf8\x88\x80\x80\x80", ENT_QUOTES, "UTF-8")));
 
+echo "--\n";
+// UTF-8: with ENT_IGNORE
+var_dump(_bin2hex(htmlentities("\xc0\xa0\xc2\x80", ENT_QUOTES | ENT_IGNORE, "UTF-8")));
+var_dump(_bin2hex(htmlentities("\xe0\x80\x80\xe0\xa0\x80", ENT_QUOTES | ENT_IGNORE, "UTF-8")));
+var_dump(_bin2hex(htmlentities("\xf0\x80\x80\x80\xf0\x90\x80\x80", ENT_QUOTES | ENT_IGNORE, "UTF-8")));
+
 echo "--\n";
 // UTF-8: alternative (invalid) UTF-8 sequence
 var_dump(_bin2hex(htmlspecialchars("\xc0\xa6", ENT_QUOTES, 'UTF-8')));
@@ -155,6 +161,10 @@ string(0) ""
 string(0) ""
 string(0) ""
 --
+string(4) "c280"
+string(6) "e0a080"
+string(8) "f0908080"
+--
 string(0) ""
 string(0) ""
 string(0) ""