]> granicus.if.org Git - php/commitdiff
json_encode: Escape U+2028 and U+2029 more often.
authorEddie Kohler <ekohler@gmail.com>
Tue, 29 Dec 2015 00:05:09 +0000 (19:05 -0500)
committerJakub Zelenka <bukka@php.net>
Fri, 22 Jan 2016 19:40:58 +0000 (19:40 +0000)
These characters are illegal in Javascript, so leaving them unescaped
is risky. The default encoder ($flags = 0) is fine, but the encoder
with JSON_UNESCAPED_UNICODE flag is not.

In case anyone wants the ability to leave these characters unescaped,
provide JSON_UNESCAPED_LINE_TERMINATORS.

ext/json/json.c
ext/json/json_encoder.c
ext/json/php_json.h
ext/json/tests/json_encode_u2028_u2029.phpt [new file with mode: 0644]

index 20bbcdc59e8d789e1dc8756297a4d4963a5d52ed..971fe15f05aac19d76e9fbdae1bd646e7ca0043f 100644 (file)
@@ -117,6 +117,7 @@ static PHP_MINIT_FUNCTION(json)
        PHP_JSON_REGISTER_CONSTANT("JSON_UNESCAPED_UNICODE", PHP_JSON_UNESCAPED_UNICODE);
        PHP_JSON_REGISTER_CONSTANT("JSON_PARTIAL_OUTPUT_ON_ERROR", PHP_JSON_PARTIAL_OUTPUT_ON_ERROR);
        PHP_JSON_REGISTER_CONSTANT("JSON_PRESERVE_ZERO_FRACTION", PHP_JSON_PRESERVE_ZERO_FRACTION);
+       PHP_JSON_REGISTER_CONSTANT("JSON_UNESCAPED_LINE_TERMINATORS", PHP_JSON_UNESCAPED_LINE_TERMINATORS);
 
        /* options for json_decode */
        PHP_JSON_REGISTER_CONSTANT("JSON_OBJECT_AS_ARRAY", PHP_JSON_OBJECT_AS_ARRAY);
index 6c2f377034923b58039ae535e40ca4d161faab13..8da5abd0884289a09a654009efedee4d4dc3d9ee 100644 (file)
@@ -321,7 +321,7 @@ static void php_json_escape_string(smart_str *buf, char *s, size_t len, int opti
 
        do {
                us = (unsigned char)s[pos];
-               if (us >= 0x80 && !(options & PHP_JSON_UNESCAPED_UNICODE)) {
+               if (us >= 0x80 && (!(options & PHP_JSON_UNESCAPED_UNICODE) || us == 0xE2)) {
                        /* UTF-8 character */
                        us = php_next_utf8_char((const unsigned char *)s, len, &pos, &status);
                        if (status != SUCCESS) {
@@ -332,6 +332,15 @@ static void php_json_escape_string(smart_str *buf, char *s, size_t len, int opti
                                smart_str_appendl(buf, "null", 4);
                                return;
                        }
+                       /* Escape U+2028/U+2029 line terminators, UNLESS both
+                          JSON_UNESCAPED_UNICODE and
+                          JSON_UNESCAPED_LINE_TERMINATORS were provided */
+                       if ((options & PHP_JSON_UNESCAPED_UNICODE)
+                               && ((options & PHP_JSON_UNESCAPED_LINE_TERMINATORS)
+                                       || us < 0x2028 || us > 0x2029)) {
+                               smart_str_appendl(buf, &s[pos - 3], 3);
+                               continue;
+                       }
                        /* From http://en.wikipedia.org/wiki/UTF16 */
                        if (us >= 0x10000) {
                                unsigned int next_us;
index f1edc6c65a7df9c6e9f945605711e0883b433d47..d8bf0dfe9df41f6f17605dfcbd9a0a1abfff1e51 100644 (file)
@@ -67,6 +67,7 @@ typedef enum {
 #define PHP_JSON_UNESCAPED_UNICODE       (1<<8)
 #define PHP_JSON_PARTIAL_OUTPUT_ON_ERROR (1<<9)
 #define PHP_JSON_PRESERVE_ZERO_FRACTION  (1<<10)
+#define PHP_JSON_UNESCAPED_LINE_TERMINATORS (1<<11)
 
 /* json_decode() options */
 #define PHP_JSON_OBJECT_AS_ARRAY         (1<<0)
diff --git a/ext/json/tests/json_encode_u2028_u2029.phpt b/ext/json/tests/json_encode_u2028_u2029.phpt
new file mode 100644 (file)
index 0000000..4b87e9b
--- /dev/null
@@ -0,0 +1,36 @@
+--TEST--
+json_encode() tests for U+2028, U+2029
+--SKIPIF--
+<?php if (!extension_loaded("json")) print "skip"; ?>
+--FILE--
+<?php
+var_dump(json_encode(array("a\xC3\xA1b")));
+var_dump(json_encode(array("a\xC3\xA1b"), JSON_UNESCAPED_UNICODE));
+var_dump(json_encode("a\xE2\x80\xA7b"));
+var_dump(json_encode("a\xE2\x80\xA7b", JSON_UNESCAPED_UNICODE));
+var_dump(json_encode("a\xE2\x80\xA8b"));
+var_dump(json_encode("a\xE2\x80\xA8b", JSON_UNESCAPED_UNICODE));
+var_dump(json_encode("a\xE2\x80\xA8b", JSON_UNESCAPED_LINE_TERMINATORS));
+var_dump(json_encode("a\xE2\x80\xA8b", JSON_UNESCAPED_UNICODE | JSON_UNESCAPED_LINE_TERMINATORS));
+var_dump(json_encode("a\xE2\x80\xA9b"));
+var_dump(json_encode("a\xE2\x80\xA9b", JSON_UNESCAPED_UNICODE));
+var_dump(json_encode("a\xE2\x80\xA9b", JSON_UNESCAPED_LINE_TERMINATORS));
+var_dump(json_encode("a\xE2\x80\xA9b", JSON_UNESCAPED_UNICODE | JSON_UNESCAPED_LINE_TERMINATORS));
+var_dump(json_encode("a\xE2\x80\xAAb"));
+var_dump(json_encode("a\xE2\x80\xAAb", JSON_UNESCAPED_UNICODE));
+?>
+--EXPECT--
+string(12) "["a\u00e1b"]"
+string(8) "["aáb"]"
+string(10) ""a\u2027b""
+string(7) ""a‧b""
+string(10) ""a\u2028b""
+string(10) ""a\u2028b""
+string(10) ""a\u2028b""
+string(7) ""a
b""
+string(10) ""a\u2029b""
+string(10) ""a\u2029b""
+string(10) ""a\u2029b""
+string(7) ""a
b""
+string(10) ""a\u202ab""
+string(7) ""a‪b""