]> granicus.if.org Git - php/commitdiff
- A couple more fix for my previous fix.
authorMoriyoshi Koizumi <moriyoshi@php.net>
Sun, 11 Oct 2009 23:52:33 +0000 (23:52 +0000)
committerMoriyoshi Koizumi <moriyoshi@php.net>
Sun, 11 Oct 2009 23:52:33 +0000 (23:52 +0000)
  (one of the fix by Arnaud Le Blanc. Thanks!)

ext/standard/html.c
ext/standard/tests/strings/bug49785.phpt
ext/standard/tests/strings/htmlentities-utf-2.phpt
ext/standard/tests/strings/htmlentities-utf.phpt

index 3ca73cce437ca9c5b841610cc2a9f2c17aed0431..bfd49465577042c8120ad2353c10aab761ad4a82 100644 (file)
@@ -489,11 +489,26 @@ struct basic_entities_dec {
                        }                        \
                        mbseq[mbpos++] = (mbchar); }
 
+/* skip one byte and return */
+#define MB_FAILURE(pos) do { \
+       *newpos = pos + 1; \
+       *status = FAILURE; \
+       return 0; \
+} while (0)
+
 #define CHECK_LEN(pos, chars_need)                     \
-       if((str_len - (pos)) < chars_need) {    \
-               *newpos = pos;                                          \
-               *status = FAILURE;                                      \
-               return 0;                                                       \
+       if (chars_need < 1) {                                           \
+               if((str_len - (pos)) < chars_need) {    \
+                       *newpos = pos;                                          \
+                       *status = FAILURE;                                      \
+                       return 0;                                                       \
+               }                                                                               \
+       } else {                                                                        \
+               if((str_len - (pos)) < chars_need) {    \
+                       *newpos = pos + 1;                                      \
+                       *status = FAILURE;                                      \
+                       return 0;                                                       \
+               }                                                                               \
        }
 
 /* {{{ get_next_char
@@ -532,10 +547,12 @@ inline static unsigned int get_next_char(enum entity_charset charset,
                     this_char = c;
                                        pos++;
                                } else if (c < 0xc0) {
-                                       *status = FAILURE;
-                                       return 0;
+                                       MB_FAILURE(pos);
                                } else if (c < 0xe0) {
                                        CHECK_LEN(pos, 2);
+                                       if (str[pos + 1] < 0x80 || str[pos + 1] > 0xbf) {
+                        MB_FAILURE(pos);
+                                       }
                                        this_char = ((c & 0x1f) << 6) | (str[pos + 1] & 0x3f);
                                        if (this_char < 0x80) {
                                                *status = FAILURE;
@@ -546,10 +563,15 @@ inline static unsigned int get_next_char(enum entity_charset charset,
                                        pos += 2;
                                } else if (c < 0xf0) {
                                        CHECK_LEN(pos, 3);
+                                       if (str[pos + 1] < 0x80 || str[pos + 1] > 0xbf) {
+                        MB_FAILURE(pos);
+                                       }
+                                       if (str[pos + 2] < 0x80 || str[pos + 2] > 0xbf) {
+                        MB_FAILURE(pos);
+                                       }
                                        this_char = ((c & 0x0f) << 12) | ((str[pos + 1] & 0x3f) << 6) | (str[pos + 2] & 0x3f);
                                        if (this_char < 0x800) {
-                                               *status = FAILURE;
-                                               return 0;
+                        MB_FAILURE(pos);
                                        }
                                        MB_WRITE((unsigned char)c);
                                        MB_WRITE((unsigned char)str[pos + 1]);
@@ -557,10 +579,18 @@ inline static unsigned int get_next_char(enum entity_charset charset,
                                        pos += 3;
                                } else if (c < 0xf8) {
                                        CHECK_LEN(pos, 4);
+                                       if (str[pos + 1] < 0x80 || str[pos + 1] > 0xbf) {
+                        MB_FAILURE(pos);
+                                       }
+                                       if (str[pos + 2] < 0x80 || str[pos + 2] > 0xbf) {
+                        MB_FAILURE(pos);
+                                       }
+                                       if (str[pos + 3] < 0x80 || str[pos + 3] > 0xbf) {
+                        MB_FAILURE(pos);
+                                       }
                                        this_char = ((c & 0x07) << 18) | ((str[pos + 1] & 0x3f) << 12) | ((str[pos + 2] & 0x3f) << 6) | (str[pos + 3] & 0x3f);
                                        if (this_char < 0x10000) {
-                                               *status = FAILURE;
-                                               return 0;
+                        MB_FAILURE(pos);
                                        }
                                        MB_WRITE((unsigned char)c);
                                        MB_WRITE((unsigned char)str[pos + 1]);
@@ -568,8 +598,7 @@ inline static unsigned int get_next_char(enum entity_charset charset,
                                        MB_WRITE((unsigned char)str[pos + 3]);
                                        pos += 4;
                                } else {
-                                       *status = FAILURE;
-                                       return 0;
+                    MB_FAILURE(pos);
                                }
                        }
                        break;
@@ -591,8 +620,7 @@ inline static unsigned int get_next_char(enum entity_charset charset,
                                                MB_WRITE(next_char);
                                                this_char = (this_char << 8) | next_char;
                                        } else {
-                                               *status = FAILURE;
-                                               return 0;
+                                               MB_FAILURE(pos);
                                        }
                                } else {
                                        MB_WRITE(this_char);
@@ -617,8 +645,7 @@ inline static unsigned int get_next_char(enum entity_charset charset,
                                                MB_WRITE(next_char);
                                                this_char = (this_char << 8) | next_char;
                                        } else {
-                                               *status = FAILURE;
-                                               return 0;
+                                               MB_FAILURE(pos);
                                        }
                                } else {
                                        MB_WRITE(this_char);
@@ -640,8 +667,7 @@ inline static unsigned int get_next_char(enum entity_charset charset,
                                                MB_WRITE(next_char);
                                                this_char = (this_char << 8) | next_char;
                                        } else {
-                                               *status = FAILURE;
-                                               return 0;
+                                               MB_FAILURE(pos);
                                        }
                                } else if (this_char == 0x8e) {
                                        /* peek at the next char */
@@ -653,8 +679,7 @@ inline static unsigned int get_next_char(enum entity_charset charset,
                                                MB_WRITE(next_char);
                                                this_char = (this_char << 8) | next_char;
                                        } else {
-                                               *status = FAILURE;
-                                               return 0;
+                                               MB_FAILURE(pos);
                                        }
                                } else if (this_char == 0x8f) {
                                        /* peek at the next two char */
@@ -671,8 +696,7 @@ inline static unsigned int get_next_char(enum entity_charset charset,
                                                MB_WRITE(next2_char);
                                                this_char = (this_char << 16) | (next_char << 8) | next_char;
                                        } else {
-                                               *status = FAILURE;
-                                               return 0;
+                                               MB_FAILURE(pos);
                                        }
                                } else {
                                        MB_WRITE(this_char);
index eb4ad8b743380c06c9b35e2075499387eb99cb48..f344855931cb672074d4957e8d0dde57813829fd 100644 (file)
@@ -7,19 +7,33 @@ function _bin2hex($val) {
 }
 
 // UTF-8: basic tests
-var_dump(bin2hex(htmlentities("\xc1\xbf", ENT_QUOTES, "UTF-8")));
-var_dump(bin2hex(htmlentities("\xc2\x80", ENT_QUOTES, "UTF-8")));
-var_dump(bin2hex(htmlentities("\xce\x91", ENT_QUOTES, "UTF-8")));
-var_dump(bin2hex(htmlentities("\xce\xb1", ENT_QUOTES, "UTF-8")));
-var_dump(bin2hex(htmlentities("\xdf\xbf", ENT_QUOTES, "UTF-8")));
-var_dump(bin2hex(htmlentities("\xe0\xa0\x80", ENT_QUOTES, "UTF-8")));
-var_dump(bin2hex(htmlentities("\xe0\x9f\xbf", ENT_QUOTES, "UTF-8")));
-var_dump(bin2hex(htmlentities("\xe2\x99\xa5", ENT_QUOTES, "UTF-8")));
-var_dump(bin2hex(htmlentities("\xef\xbf\xbf", ENT_QUOTES, "UTF-8")));
-var_dump(bin2hex(htmlentities("\xf0\x8f\xbf\xbf", ENT_QUOTES, "UTF-8")));
-var_dump(bin2hex(htmlentities("\xf0\x90\x80\x80", ENT_QUOTES, "UTF-8")));
-var_dump(bin2hex(htmlentities("\xf7\xbf\xbf\xbf", ENT_QUOTES, "UTF-8")));
-var_dump(bin2hex(htmlentities("\xf8\x88\x80\x80\x80", ENT_QUOTES, "UTF-8")));
+var_dump(_bin2hex(htmlentities("\xc1\xbf", ENT_QUOTES, "UTF-8")));
+var_dump(_bin2hex(htmlentities("\xc2\x80", ENT_QUOTES, "UTF-8")));
+var_dump(_bin2hex(htmlentities("\xc2\x00", ENT_QUOTES, "UTF-8")));
+var_dump(_bin2hex(htmlentities("\xc2\xc0", ENT_QUOTES, "UTF-8")));
+var_dump(_bin2hex(htmlentities("\xce\x91", ENT_QUOTES, "UTF-8")));
+var_dump(_bin2hex(htmlentities("\xce\xb1", ENT_QUOTES, "UTF-8")));
+var_dump(_bin2hex(htmlentities("\xdf\xbf", ENT_QUOTES, "UTF-8")));
+var_dump(_bin2hex(htmlentities("\xe0\xa0\x80", ENT_QUOTES, "UTF-8")));
+var_dump(_bin2hex(htmlentities("\xe0\x9f\xbf", ENT_QUOTES, "UTF-8")));
+var_dump(_bin2hex(htmlentities("\xe0\x9f\xbf", ENT_QUOTES, "UTF-8")));
+var_dump(_bin2hex(htmlentities("\xe0\x1f\xbf", ENT_QUOTES, "UTF-8")));
+var_dump(_bin2hex(htmlentities("\xe0\x9f\x3f", ENT_QUOTES, "UTF-8")));
+var_dump(_bin2hex(htmlentities("\xe0\x1f\x3f", ENT_QUOTES, "UTF-8")));
+var_dump(_bin2hex(htmlentities("\xe2\x99\xa5", ENT_QUOTES, "UTF-8")));
+var_dump(_bin2hex(htmlentities("\xef\xbf\xbf", ENT_QUOTES, "UTF-8")));
+var_dump(_bin2hex(htmlentities("\xef\xff\xbf", ENT_QUOTES, "UTF-8")));
+var_dump(_bin2hex(htmlentities("\xef\xbf\xff", ENT_QUOTES, "UTF-8")));
+var_dump(_bin2hex(htmlentities("\xf0\x8f\xbf\xbf", ENT_QUOTES, "UTF-8")));
+var_dump(_bin2hex(htmlentities("\xf0\x90\x80\x80", ENT_QUOTES, "UTF-8")));
+var_dump(_bin2hex(htmlentities("\xf7\xbf\xbf\xbf", ENT_QUOTES, "UTF-8")));
+var_dump(_bin2hex(htmlentities("\xf7\x3f\xbf\xbf", ENT_QUOTES, "UTF-8")));
+var_dump(_bin2hex(htmlentities("\xf7\xbf\x3f\xbf", ENT_QUOTES, "UTF-8")));
+var_dump(_bin2hex(htmlentities("\xf7\xbf\xbf\x3f", ENT_QUOTES, "UTF-8")));
+var_dump(_bin2hex(htmlentities("\xf7\xff\xbf\xbf", ENT_QUOTES, "UTF-8")));
+var_dump(_bin2hex(htmlentities("\xf7\xbf\xff\xbf", ENT_QUOTES, "UTF-8")));
+var_dump(_bin2hex(htmlentities("\xf7\xbf\xbf\xff", ENT_QUOTES, "UTF-8")));
+var_dump(_bin2hex(htmlentities("\xf8\x88\x80\x80\x80", ENT_QUOTES, "UTF-8")));
 
 echo "--\n";
 // UTF-8: alternative (invalid) UTF-8 sequence
@@ -115,17 +129,31 @@ foreach (array_map('chr', range(0x81, 0xfe)) as $c) {
 --EXPECT--
 string(0) ""
 string(4) "c280"
+string(0) ""
+string(0) ""
 string(14) "26416c7068613b"
 string(14) "26616c7068613b"
 string(4) "dfbf"
 string(6) "e0a080"
 string(0) ""
+string(0) ""
+string(0) ""
+string(0) ""
+string(0) ""
 string(16) "266865617274733b"
 string(6) "efbfbf"
 string(0) ""
+string(0) ""
+string(0) ""
 string(8) "f0908080"
 string(8) "f7bfbfbf"
 string(0) ""
+string(0) ""
+string(0) ""
+string(0) ""
+string(0) ""
+string(0) ""
+string(0) ""
 --
 string(0) ""
 string(0) ""
index a80100cb10e8d550b699caaeb73aec8fd9af502a..c5f4ac4ea6dbf4705cf9984946fff83a4f8c739c 100755 (executable)
@@ -36,8 +36,8 @@ foreach($strings as $string) {
 %unicode|string%(0) ""
 %unicode|string%(2) "79"
 %unicode|string%(2) "79"
-%unicode|string%(8) "2667743b"
-%unicode|string%(8) "2667743b"
+%unicode|string%(0) ""
+%unicode|string%(0) ""
 %unicode|string%(8) "566f696c"
 %unicode|string%(8) "566f696c"
 %unicode|string%(12) "436c69636873"
@@ -52,10 +52,10 @@ foreach($strings as $string) {
 %unicode|string%(2) "79"
 %unicode|string%(8) "f7bfbfbf"
 %unicode|string%(8) "f7bfbfbf"
-%unicode|string%(10) "fbbfbfbfbf"
-%unicode|string%(10) "fbbfbfbfbf"
-%unicode|string%(12) "fdbfbfbfbfbf"
-%unicode|string%(12) "fdbfbfbfbfbf"
+%unicode|string%(0) ""
+%unicode|string%(0) ""
+%unicode|string%(0) ""
+%unicode|string%(0) ""
 %unicode|string%(4) "4142"
 %unicode|string%(4) "4142"
 %unicode|string%(4) "4242"
index b85803a16324b67c1096f23dca175c57876151f3..1daafc61d82b1477c146a78b57fd53c779eb375a 100755 (executable)
@@ -36,8 +36,8 @@ foreach($strings as $string) {
 %unicode|string%(0) ""
 %unicode|string%(0) ""
 %unicode|string%(0) ""
-%unicode|string%(8) "2667743b"
-%unicode|string%(8) "2667743b"
+%unicode|string%(0) ""
+%unicode|string%(0) ""
 %unicode|string%(0) ""
 %unicode|string%(0) ""
 %unicode|string%(0) ""
@@ -52,10 +52,10 @@ foreach($strings as $string) {
 %unicode|string%(0) ""
 %unicode|string%(8) "f7bfbfbf"
 %unicode|string%(8) "f7bfbfbf"
-%unicode|string%(10) "fbbfbfbfbf"
-%unicode|string%(10) "fbbfbfbfbf"
-%unicode|string%(12) "fdbfbfbfbfbf"
-%unicode|string%(12) "fdbfbfbfbfbf"
+%unicode|string%(0) ""
+%unicode|string%(0) ""
+%unicode|string%(0) ""
+%unicode|string%(0) ""
 %unicode|string%(0) ""
 %unicode|string%(0) ""
 %unicode|string%(0) ""