Fix jsonb Unicode escape processing, and in consequence disallow \u0000.

author Tom Lane <tgl@sss.pgh.pa.us>

Fri, 30 Jan 2015 19:44:46 +0000 (14:44 -0500)

committer Tom Lane <tgl@sss.pgh.pa.us>

Fri, 30 Jan 2015 19:44:56 +0000 (14:44 -0500)
author Tom Lane <tgl@sss.pgh.pa.us>
Fri, 30 Jan 2015 19:44:46 +0000 (14:44 -0500)
committer Tom Lane <tgl@sss.pgh.pa.us>
Fri, 30 Jan 2015 19:44:56 +0000 (14:44 -0500)
diff --git a/doc/src/sgml/json.sgml b/doc/src/sgml/json.sgml

index 8feb2fbf0ad251833380c8da7fd62c876ef330e8..6282ab885397683428197d526bbb226093184c9e 100644 (file)
--- a/doc/src/sgml/json.sgml
+++ b/doc/src/sgml/json.sgml
@@ -69,12 +69,14 @@
    regardless of the database encoding, and are checked only for syntactic
    correctness (that is, that four hex digits follow <literal>\u</>).
    However, the input function for <type>jsonb</> is stricter: it disallows
-  Unicode escapes for non-ASCII characters (those
-  above <literal>U+007F</>) unless the database encoding is UTF8.  It also
-  insists that any use of Unicode surrogate pairs to designate characters
-  outside the Unicode Basic Multilingual Plane be correct.  Valid Unicode
-  escapes, except for <literal>\u0000</>, are then converted to the
-  equivalent ASCII or UTF8 character for storage.
+  Unicode escapes for non-ASCII characters (those above <literal>U+007F</>)
+  unless the database encoding is UTF8.  The <type>jsonb</> type also
+  rejects <literal>\u0000</> (because that cannot be represented in
+  <productname>PostgreSQL</productname>'s <type>text</> type), and it insists
+  that any use of Unicode surrogate pairs to designate characters outside
+  the Unicode Basic Multilingual Plane be correct.  Valid Unicode escapes
+  are converted to the equivalent ASCII or UTF8 character for storage;
+  this includes folding surrogate pairs into a single character.
   </para>
  
   <note>
@@ -101,7 +103,7 @@
    constitutes valid <type>jsonb</type> data that do not apply to
    the <type>json</type> type, nor to JSON in the abstract, corresponding
    to limits on what can be represented by the underlying data type.
-  Specifically, <type>jsonb</> will reject numbers that are outside the
+  Notably, <type>jsonb</> will reject numbers that are outside the
    range of the <productname>PostgreSQL</productname> <type>numeric</> data
    type, while <type>json</> will not.  Such implementation-defined
    restrictions are permitted by <acronym>RFC</> 7159.  However, in
@@ -134,7 +136,8 @@
         <row>
          <entry><type>string</></entry>
          <entry><type>text</></entry>
-        <entry>See notes above concerning encoding restrictions</entry>
+        <entry><literal>\u0000</> is disallowed, as are non-ASCII Unicode
+         escapes if database encoding is not UTF8</entry>
         </row>
         <row>
          <entry><type>number</></entry>
diff --git a/doc/src/sgml/release-9.4.sgml b/doc/src/sgml/release-9.4.sgml

index 961e4617978e965ab065ee368c28e0efc23f15f4..11bbf3bf36ce6b28c777bfda293a8496864400e4 100644 (file)
--- a/doc/src/sgml/release-9.4.sgml
+++ b/doc/src/sgml/release-9.4.sgml
@@ -101,22 +101,6 @@
       </para>
      </listitem>
  
-    <listitem>
-     <para>
-      Unicode escapes in <link linkend="datatype-json"><type>JSON</type></link>
-      text values are no longer rendered with the backslash escaped
-      (Andrew Dunstan)
-     </para>
-
-     <para>
-      Previously, all backslashes in text values being formed into JSON
-      were escaped. Now a backslash followed by <literal>u</> and four
-      hexadecimal digits is not escaped, as this is a legal sequence in a
-      JSON string value, and escaping the backslash led to some perverse
-      results.
-     </para>
-    </listitem>
-
      <listitem>
       <para>
        When converting values of type <type>date</>, <type>timestamp</>
diff --git a/src/backend/utils/adt/json.c b/src/backend/utils/adt/json.c

index 3c137ead1d07cc526ef428c1aeacefbbc6913d11..951b6554007b2272d917e1e431c084130a7ed6d7 100644 (file)
--- a/src/backend/utils/adt/json.c
+++ b/src/backend/utils/adt/json.c
@@ -806,14 +806,17 @@ json_lex_string(JsonLexContext *lex)
                                          * For UTF8, replace the escape sequence by the actual
                                          * utf8 character in lex->strval. Do this also for other
                                          * encodings if the escape designates an ASCII character,
-                                        * otherwise raise an error. We don't ever unescape a
-                                        * \u0000, since that would result in an impermissible nul
-                                        * byte.
+                                        * otherwise raise an error.
                                          */
  
                                         if (ch == 0)
                                         {
-                                               appendStringInfoString(lex->strval, "\\u0000");
+                                               /* We can't allow this, since our TEXT type doesn't */
+                                               ereport(ERROR,
+                                                               (errcode(ERRCODE_UNTRANSLATABLE_CHARACTER),
+                                                          errmsg("unsupported Unicode escape sequence"),
+                                                  errdetail("\\u0000 cannot be converted to text."),
+                                                                report_json_context(lex)));
                                         }
                                         else if (GetDatabaseEncoding() == PG_UTF8)
                                         {
@@ -833,8 +836,8 @@ json_lex_string(JsonLexContext *lex)
                                         else
                                         {
                                                 ereport(ERROR,
-                                                               (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
-                                                                errmsg("invalid input syntax for type json"),
+                                                               (errcode(ERRCODE_UNTRANSLATABLE_CHARACTER),
+                                                          errmsg("unsupported Unicode escape sequence"),
                                                                  errdetail("Unicode escape values cannot be used for code point values above 007F when the server encoding is not UTF8."),
                                                                  report_json_context(lex)));
                                         }
@@ -1284,8 +1287,8 @@ json_categorize_type(Oid typoid,
  
         /*
          * We need to get the output function for everything except date and
-        * timestamp types, array and composite types, booleans,
-        * and non-builtin types  where there's a cast to json.
+        * timestamp types, array and composite types, booleans, and non-builtin
+        * types where there's a cast to json.
          */
  
         switch (typoid)
@@ -1335,11 +1338,12 @@ json_categorize_type(Oid typoid,
                                 /* but let's look for a cast to json, if it's not built-in */
                                 if (typoid >= FirstNormalObjectId)
                                 {
-                                       Oid castfunc;
+                                       Oid                     castfunc;
                                         CoercionPathType ctype;
  
                                         ctype = find_coercion_pathway(JSONOID, typoid,
-                                                                                                 COERCION_EXPLICIT, &castfunc);
+                                                                                                 COERCION_EXPLICIT,
+                                                                                                 &castfunc);
                                         if (ctype == COERCION_PATH_FUNC && OidIsValid(castfunc))
                                         {
                                                 *tcategory = JSONTYPE_CAST;
@@ -2382,30 +2386,7 @@ escape_json(StringInfo buf, const char *str)
                                 appendStringInfoString(buf, "\\\"");
                                 break;
                         case '\\':
-
-                               /*
-                                * Unicode escapes are passed through as is. There is no
-                                * requirement that they denote a valid character in the
-                                * server encoding - indeed that is a big part of their
-                                * usefulness.
-                                *
-                                * All we require is that they consist of \uXXXX where the Xs
-                                * are hexadecimal digits. It is the responsibility of the
-                                * caller of, say, to_json() to make sure that the unicode
-                                * escape is valid.
-                                *
-                                * In the case of a jsonb string value being escaped, the only
-                                * unicode escape that should be present is \u0000, all the
-                                * other unicode escapes will have been resolved.
-                                */
-                               if (p[1] == 'u' &&
-                                       isxdigit((unsigned char) p[2]) &&
-                                       isxdigit((unsigned char) p[3]) &&
-                                       isxdigit((unsigned char) p[4]) &&
-                                       isxdigit((unsigned char) p[5]))
-                                       appendStringInfoCharMacro(buf, *p);
-                               else
-                                       appendStringInfoString(buf, "\\\\");
+                               appendStringInfoString(buf, "\\\\");
                                 break;
                         default:
                                 if ((unsigned char) *p < ' ')
diff --git a/src/test/regress/expected/json.out b/src/test/regress/expected/json.out

index e435d3e16502b23e236007058a983aab2f200272..16704363dc62b9ccfedab1b124f574821f5c936b 100644 (file)
--- a/src/test/regress/expected/json.out
+++ b/src/test/regress/expected/json.out
@@ -426,20 +426,6 @@ select to_json(timestamptz '2014-05-28 12:22:35.614298-04');
  (1 row)
  
  COMMIT;
--- unicode escape - backslash is not escaped
-select to_json(text '\uabcd');
- to_json  
-----------
- "\uabcd"
-(1 row)
-
--- any other backslash is escaped
-select to_json(text '\abcd');
- to_json  
-----------
- "\\abcd"
-(1 row)
-
  --json_agg
  SELECT json_agg(q)
    FROM ( SELECT $$a$$ || x AS b, y AS c,
@@ -1400,6 +1386,36 @@ ERROR:  invalid input syntax for type json
  DETAIL:  Unicode low surrogate must follow a high surrogate.
  CONTEXT:  JSON data, line 1: { "a":...
  --handling of simple unicode escapes
+select json '{ "a":  "the Copyright \u00a9 sign" }' as correct_in_utf8;
+            correct_in_utf8            
+---------------------------------------
+ { "a":  "the Copyright \u00a9 sign" }
+(1 row)
+
+select json '{ "a":  "dollar \u0024 character" }' as correct_everywhere;
+         correct_everywhere          
+-------------------------------------
+ { "a":  "dollar \u0024 character" }
+(1 row)
+
+select json '{ "a":  "dollar \\u0024 character" }' as not_an_escape;
+            not_an_escape             
+--------------------------------------
+ { "a":  "dollar \\u0024 character" }
+(1 row)
+
+select json '{ "a":  "null \u0000 escape" }' as not_unescaped;
+         not_unescaped          
+--------------------------------
+ { "a":  "null \u0000 escape" }
+(1 row)
+
+select json '{ "a":  "null \\u0000 escape" }' as not_an_escape;
+          not_an_escape          
+---------------------------------
+ { "a":  "null \\u0000 escape" }
+(1 row)
+
  select json '{ "a":  "the Copyright \u00a9 sign" }' ->> 'a' as correct_in_utf8;
     correct_in_utf8    
  ----------------------
@@ -1412,8 +1428,18 @@ select json '{ "a":  "dollar \u0024 character" }' ->> 'a' as correct_everywhere;
   dollar $ character
  (1 row)
  
-select json '{ "a":  "null \u0000 escape" }' ->> 'a' as not_unescaped;
-   not_unescaped    
+select json '{ "a":  "dollar \\u0024 character" }' ->> 'a' as not_an_escape;
+      not_an_escape      
+-------------------------
+ dollar \u0024 character
+(1 row)
+
+select json '{ "a":  "null \u0000 escape" }' ->> 'a' as fails;
+ERROR:  unsupported Unicode escape sequence
+DETAIL:  \u0000 cannot be converted to text.
+CONTEXT:  JSON data, line 1: { "a":...
+select json '{ "a":  "null \\u0000 escape" }' ->> 'a' as not_an_escape;
+   not_an_escape    
  --------------------
   null \u0000 escape
  (1 row)
diff --git a/src/test/regress/expected/json_1.out b/src/test/regress/expected/json_1.out

index 106b481fab91bf82a6b9c339875e2c2884d42272..807814641dd897f6e47c3b5eb8141c21f3269714 100644 (file)
--- a/src/test/regress/expected/json_1.out
+++ b/src/test/regress/expected/json_1.out
@@ -426,20 +426,6 @@ select to_json(timestamptz '2014-05-28 12:22:35.614298-04');
  (1 row)
  
  COMMIT;
--- unicode escape - backslash is not escaped
-select to_json(text '\uabcd');
- to_json  
-----------
- "\uabcd"
-(1 row)
-
--- any other backslash is escaped
-select to_json(text '\abcd');
- to_json  
-----------
- "\\abcd"
-(1 row)
-
  --json_agg
  SELECT json_agg(q)
    FROM ( SELECT $$a$$ || x AS b, y AS c,
@@ -1378,7 +1364,7 @@ select * from json_populate_recordset(row('def',99,null)::jpop,'[{"a":[100,200,3
  
  -- handling of unicode surrogate pairs
  select json '{ "a":  "\ud83d\ude04\ud83d\udc36" }' -> 'a' as correct_in_utf8;
-ERROR:  invalid input syntax for type json
+ERROR:  unsupported Unicode escape sequence
  DETAIL:  Unicode escape values cannot be used for code point values above 007F when the server encoding is not UTF8.
  CONTEXT:  JSON data, line 1: { "a":...
  select json '{ "a":  "\ud83d\ud83d" }' -> 'a'; -- 2 high surrogates in a row
@@ -1398,8 +1384,38 @@ ERROR:  invalid input syntax for type json
  DETAIL:  Unicode low surrogate must follow a high surrogate.
  CONTEXT:  JSON data, line 1: { "a":...
  --handling of simple unicode escapes
+select json '{ "a":  "the Copyright \u00a9 sign" }' as correct_in_utf8;
+            correct_in_utf8            
+---------------------------------------
+ { "a":  "the Copyright \u00a9 sign" }
+(1 row)
+
+select json '{ "a":  "dollar \u0024 character" }' as correct_everywhere;
+         correct_everywhere          
+-------------------------------------
+ { "a":  "dollar \u0024 character" }
+(1 row)
+
+select json '{ "a":  "dollar \\u0024 character" }' as not_an_escape;
+            not_an_escape             
+--------------------------------------
+ { "a":  "dollar \\u0024 character" }
+(1 row)
+
+select json '{ "a":  "null \u0000 escape" }' as not_unescaped;
+         not_unescaped          
+--------------------------------
+ { "a":  "null \u0000 escape" }
+(1 row)
+
+select json '{ "a":  "null \\u0000 escape" }' as not_an_escape;
+          not_an_escape          
+---------------------------------
+ { "a":  "null \\u0000 escape" }
+(1 row)
+
  select json '{ "a":  "the Copyright \u00a9 sign" }' ->> 'a' as correct_in_utf8;
-ERROR:  invalid input syntax for type json
+ERROR:  unsupported Unicode escape sequence
  DETAIL:  Unicode escape values cannot be used for code point values above 007F when the server encoding is not UTF8.
  CONTEXT:  JSON data, line 1: { "a":...
  select json '{ "a":  "dollar \u0024 character" }' ->> 'a' as correct_everywhere;
@@ -1408,8 +1424,18 @@ select json '{ "a":  "dollar \u0024 character" }' ->> 'a' as correct_everywhere;
   dollar $ character
  (1 row)
  
-select json '{ "a":  "null \u0000 escape" }' ->> 'a' as not_unescaped;
-   not_unescaped    
+select json '{ "a":  "dollar \\u0024 character" }' ->> 'a' as not_an_escape;
+      not_an_escape      
+-------------------------
+ dollar \u0024 character
+(1 row)
+
+select json '{ "a":  "null \u0000 escape" }' ->> 'a' as fails;
+ERROR:  unsupported Unicode escape sequence
+DETAIL:  \u0000 cannot be converted to text.
+CONTEXT:  JSON data, line 1: { "a":...
+select json '{ "a":  "null \\u0000 escape" }' ->> 'a' as not_an_escape;
+   not_an_escape    
  --------------------
   null \u0000 escape
  (1 row)
diff --git a/src/test/regress/expected/jsonb.out b/src/test/regress/expected/jsonb.out

index aa5686ffb69dbeda3a355f13cfcd1ed1fd296265..6c6ed950f0830c8323d48134618e57ed0c0fc9de 100644 (file)
--- a/src/test/regress/expected/jsonb.out
+++ b/src/test/regress/expected/jsonb.out
@@ -60,12 +60,18 @@ LINE 1: SELECT '"\u000g"'::jsonb;
                 ^
  DETAIL:  "\u" must be followed by four hexadecimal digits.
  CONTEXT:  JSON data, line 1: "\u000g...
-SELECT '"\u0000"'::jsonb;              -- OK, legal escape
-  jsonb   
-----------
- "\u0000"
+SELECT '"\u0045"'::jsonb;              -- OK, legal escape
+ jsonb 
+-------
+ "E"
  (1 row)
  
+SELECT '"\u0000"'::jsonb;              -- ERROR, we don't support U+0000
+ERROR:  unsupported Unicode escape sequence
+LINE 1: SELECT '"\u0000"'::jsonb;
+               ^
+DETAIL:  \u0000 cannot be converted to text.
+CONTEXT:  JSON data, line 1: ...
  -- use octet_length here so we don't get an odd unicode char in the
  -- output
  SELECT octet_length('"\uaBcD"'::jsonb::text); -- OK, uppercase and lower case both OK
@@ -324,20 +330,6 @@ select to_jsonb(timestamptz '2014-05-28 12:22:35.614298-04');
  (1 row)
  
  COMMIT;
--- unicode escape - backslash is not escaped
-select to_jsonb(text '\uabcd');
- to_jsonb 
-----------
- "\uabcd"
-(1 row)
-
--- any other backslash is escaped
-select to_jsonb(text '\abcd');
- to_jsonb 
-----------
- "\\abcd"
-(1 row)
-
  --jsonb_agg
  CREATE TEMP TABLE rows AS
  SELECT x, 'txt' || x as y
@@ -1971,20 +1963,62 @@ LINE 1: SELECT jsonb '{ "a":  "\ude04X" }' -> 'a';
  DETAIL:  Unicode low surrogate must follow a high surrogate.
  CONTEXT:  JSON data, line 1: { "a":...
  -- handling of simple unicode escapes
-SELECT jsonb '{ "a":  "the Copyright \u00a9 sign" }' ->> 'a' AS correct_in_utf8;
+SELECT jsonb '{ "a":  "the Copyright \u00a9 sign" }' as correct_in_utf8;
+        correct_in_utf8        
+-------------------------------
+ {"a": "the Copyright © sign"}
+(1 row)
+
+SELECT jsonb '{ "a":  "dollar \u0024 character" }' as correct_everywhere;
+     correct_everywhere      
+-----------------------------
+ {"a": "dollar $ character"}
+(1 row)
+
+SELECT jsonb '{ "a":  "dollar \\u0024 character" }' as not_an_escape;
+           not_an_escape           
+-----------------------------------
+ {"a": "dollar \\u0024 character"}
+(1 row)
+
+SELECT jsonb '{ "a":  "null \u0000 escape" }' as fails;
+ERROR:  unsupported Unicode escape sequence
+LINE 1: SELECT jsonb '{ "a":  "null \u0000 escape" }' as fails;
+                     ^
+DETAIL:  \u0000 cannot be converted to text.
+CONTEXT:  JSON data, line 1: { "a":...
+SELECT jsonb '{ "a":  "null \\u0000 escape" }' as not_an_escape;
+        not_an_escape         
+------------------------------
+ {"a": "null \\u0000 escape"}
+(1 row)
+
+SELECT jsonb '{ "a":  "the Copyright \u00a9 sign" }' ->> 'a' as correct_in_utf8;
     correct_in_utf8    
  ----------------------
   the Copyright © sign
  (1 row)
  
-SELECT jsonb '{ "a":  "dollar \u0024 character" }' ->> 'a' AS correct_everyWHERE;
+SELECT jsonb '{ "a":  "dollar \u0024 character" }' ->> 'a' as correct_everywhere;
   correct_everywhere 
  --------------------
   dollar $ character
  (1 row)
  
-SELECT jsonb '{ "a":  "null \u0000 escape" }' ->> 'a' AS not_unescaped;
-   not_unescaped    
+SELECT jsonb '{ "a":  "dollar \\u0024 character" }' ->> 'a' as not_an_escape;
+      not_an_escape      
+-------------------------
+ dollar \u0024 character
+(1 row)
+
+SELECT jsonb '{ "a":  "null \u0000 escape" }' ->> 'a' as fails;
+ERROR:  unsupported Unicode escape sequence
+LINE 1: SELECT jsonb '{ "a":  "null \u0000 escape" }' ->> 'a' as fai...
+                     ^
+DETAIL:  \u0000 cannot be converted to text.
+CONTEXT:  JSON data, line 1: { "a":...
+SELECT jsonb '{ "a":  "null \\u0000 escape" }' ->> 'a' as not_an_escape;
+   not_an_escape    
  --------------------
   null \u0000 escape
  (1 row)
diff --git a/src/test/regress/expected/jsonb_1.out b/src/test/regress/expected/jsonb_1.out

index 687ae63b7072ad2fe69b5fd912e868a12577f96f..f30148d51c1bdc232266ca5a6998237b67f39de0 100644 (file)
--- a/src/test/regress/expected/jsonb_1.out
+++ b/src/test/regress/expected/jsonb_1.out
@@ -60,16 +60,22 @@ LINE 1: SELECT '"\u000g"'::jsonb;
                 ^
  DETAIL:  "\u" must be followed by four hexadecimal digits.
  CONTEXT:  JSON data, line 1: "\u000g...
-SELECT '"\u0000"'::jsonb;              -- OK, legal escape
-  jsonb   
-----------
- "\u0000"
+SELECT '"\u0045"'::jsonb;              -- OK, legal escape
+ jsonb 
+-------
+ "E"
  (1 row)
  
+SELECT '"\u0000"'::jsonb;              -- ERROR, we don't support U+0000
+ERROR:  unsupported Unicode escape sequence
+LINE 1: SELECT '"\u0000"'::jsonb;
+               ^
+DETAIL:  \u0000 cannot be converted to text.
+CONTEXT:  JSON data, line 1: ...
  -- use octet_length here so we don't get an odd unicode char in the
  -- output
  SELECT octet_length('"\uaBcD"'::jsonb::text); -- OK, uppercase and lower case both OK
-ERROR:  invalid input syntax for type json
+ERROR:  unsupported Unicode escape sequence
  LINE 1: SELECT octet_length('"\uaBcD"'::jsonb::text);
                              ^
  DETAIL:  Unicode escape values cannot be used for code point values above 007F when the server encoding is not UTF8.
@@ -324,20 +330,6 @@ select to_jsonb(timestamptz '2014-05-28 12:22:35.614298-04');
  (1 row)
  
  COMMIT;
--- unicode escape - backslash is not escaped
-select to_jsonb(text '\uabcd');
- to_jsonb 
-----------
- "\uabcd"
-(1 row)
-
--- any other backslash is escaped
-select to_jsonb(text '\abcd');
- to_jsonb 
-----------
- "\\abcd"
-(1 row)
-
  --jsonb_agg
  CREATE TEMP TABLE rows AS
  SELECT x, 'txt' || x as y
@@ -1941,7 +1933,7 @@ SELECT * FROM jsonb_populate_recordset(row('def',99,NULL)::jbpop,'[{"a":[100,200
  
  -- handling of unicode surrogate pairs
  SELECT octet_length((jsonb '{ "a":  "\ud83d\ude04\ud83d\udc36" }' -> 'a')::text) AS correct_in_utf8;
-ERROR:  invalid input syntax for type json
+ERROR:  unsupported Unicode escape sequence
  LINE 1: SELECT octet_length((jsonb '{ "a":  "\ud83d\ude04\ud83d\udc3...
                                     ^
  DETAIL:  Unicode escape values cannot be used for code point values above 007F when the server encoding is not UTF8.
@@ -1971,20 +1963,62 @@ LINE 1: SELECT jsonb '{ "a":  "\ude04X" }' -> 'a';
  DETAIL:  Unicode low surrogate must follow a high surrogate.
  CONTEXT:  JSON data, line 1: { "a":...
  -- handling of simple unicode escapes
-SELECT jsonb '{ "a":  "the Copyright \u00a9 sign" }' ->> 'a' AS correct_in_utf8;
-ERROR:  invalid input syntax for type json
+SELECT jsonb '{ "a":  "the Copyright \u00a9 sign" }' as correct_in_utf8;
+ERROR:  unsupported Unicode escape sequence
+LINE 1: SELECT jsonb '{ "a":  "the Copyright \u00a9 sign" }' as corr...
+                     ^
+DETAIL:  Unicode escape values cannot be used for code point values above 007F when the server encoding is not UTF8.
+CONTEXT:  JSON data, line 1: { "a":...
+SELECT jsonb '{ "a":  "dollar \u0024 character" }' as correct_everywhere;
+     correct_everywhere      
+-----------------------------
+ {"a": "dollar $ character"}
+(1 row)
+
+SELECT jsonb '{ "a":  "dollar \\u0024 character" }' as not_an_escape;
+           not_an_escape           
+-----------------------------------
+ {"a": "dollar \\u0024 character"}
+(1 row)
+
+SELECT jsonb '{ "a":  "null \u0000 escape" }' as fails;
+ERROR:  unsupported Unicode escape sequence
+LINE 1: SELECT jsonb '{ "a":  "null \u0000 escape" }' as fails;
+                     ^
+DETAIL:  \u0000 cannot be converted to text.
+CONTEXT:  JSON data, line 1: { "a":...
+SELECT jsonb '{ "a":  "null \\u0000 escape" }' as not_an_escape;
+        not_an_escape         
+------------------------------
+ {"a": "null \\u0000 escape"}
+(1 row)
+
+SELECT jsonb '{ "a":  "the Copyright \u00a9 sign" }' ->> 'a' as correct_in_utf8;
+ERROR:  unsupported Unicode escape sequence
  LINE 1: SELECT jsonb '{ "a":  "the Copyright \u00a9 sign" }' ->> 'a'...
                       ^
  DETAIL:  Unicode escape values cannot be used for code point values above 007F when the server encoding is not UTF8.
  CONTEXT:  JSON data, line 1: { "a":...
-SELECT jsonb '{ "a":  "dollar \u0024 character" }' ->> 'a' AS correct_everyWHERE;
+SELECT jsonb '{ "a":  "dollar \u0024 character" }' ->> 'a' as correct_everywhere;
   correct_everywhere 
  --------------------
   dollar $ character
  (1 row)
  
-SELECT jsonb '{ "a":  "null \u0000 escape" }' ->> 'a' AS not_unescaped;
-   not_unescaped    
+SELECT jsonb '{ "a":  "dollar \\u0024 character" }' ->> 'a' as not_an_escape;
+      not_an_escape      
+-------------------------
+ dollar \u0024 character
+(1 row)
+
+SELECT jsonb '{ "a":  "null \u0000 escape" }' ->> 'a' as fails;
+ERROR:  unsupported Unicode escape sequence
+LINE 1: SELECT jsonb '{ "a":  "null \u0000 escape" }' ->> 'a' as fai...
+                     ^
+DETAIL:  \u0000 cannot be converted to text.
+CONTEXT:  JSON data, line 1: { "a":...
+SELECT jsonb '{ "a":  "null \\u0000 escape" }' ->> 'a' as not_an_escape;
+   not_an_escape    
  --------------------
   null \u0000 escape
  (1 row)
diff --git a/src/test/regress/sql/json.sql b/src/test/regress/sql/json.sql

index 36a6674ff91092e0dc525edfdff516102eaa5c9e..53a37a88439171127c220470f5319dcdc172239d 100644 (file)
--- a/src/test/regress/sql/json.sql
+++ b/src/test/regress/sql/json.sql
@@ -111,14 +111,6 @@ SET LOCAL TIME ZONE -8;
  select to_json(timestamptz '2014-05-28 12:22:35.614298-04');
  COMMIT;
  
--- unicode escape - backslash is not escaped
-
-select to_json(text '\uabcd');
-
--- any other backslash is escaped
-
-select to_json(text '\abcd');
-
  --json_agg
  
  SELECT json_agg(q)
@@ -401,9 +393,17 @@ select json '{ "a":  "\ude04X" }' -> 'a'; -- orphan low surrogate
  
  --handling of simple unicode escapes
  
+select json '{ "a":  "the Copyright \u00a9 sign" }' as correct_in_utf8;
+select json '{ "a":  "dollar \u0024 character" }' as correct_everywhere;
+select json '{ "a":  "dollar \\u0024 character" }' as not_an_escape;
+select json '{ "a":  "null \u0000 escape" }' as not_unescaped;
+select json '{ "a":  "null \\u0000 escape" }' as not_an_escape;
+
  select json '{ "a":  "the Copyright \u00a9 sign" }' ->> 'a' as correct_in_utf8;
  select json '{ "a":  "dollar \u0024 character" }' ->> 'a' as correct_everywhere;
-select json '{ "a":  "null \u0000 escape" }' ->> 'a' as not_unescaped;
+select json '{ "a":  "dollar \\u0024 character" }' ->> 'a' as not_an_escape;
+select json '{ "a":  "null \u0000 escape" }' ->> 'a' as fails;
+select json '{ "a":  "null \\u0000 escape" }' ->> 'a' as not_an_escape;
  
  --json_typeof() function
  select value, json_typeof(value)
diff --git a/src/test/regress/sql/jsonb.sql b/src/test/regress/sql/jsonb.sql

index a846103933092dd51c4f49a962ba03775a07dcf5..53cc2393c626c01d8303a3b9bb63293a1b50593b 100644 (file)
--- a/src/test/regress/sql/jsonb.sql
+++ b/src/test/regress/sql/jsonb.sql
@@ -10,7 +10,8 @@ SELECT '"\v"'::jsonb;                 -- ERROR, not a valid JSON escape
  SELECT '"\u"'::jsonb;                  -- ERROR, incomplete escape
  SELECT '"\u00"'::jsonb;                        -- ERROR, incomplete escape
  SELECT '"\u000g"'::jsonb;              -- ERROR, g is not a hex digit
-SELECT '"\u0000"'::jsonb;              -- OK, legal escape
+SELECT '"\u0045"'::jsonb;              -- OK, legal escape
+SELECT '"\u0000"'::jsonb;              -- ERROR, we don't support U+0000
  -- use octet_length here so we don't get an odd unicode char in the
  -- output
  SELECT octet_length('"\uaBcD"'::jsonb::text); -- OK, uppercase and lower case both OK
@@ -73,14 +74,6 @@ SET LOCAL TIME ZONE -8;
  select to_jsonb(timestamptz '2014-05-28 12:22:35.614298-04');
  COMMIT;
  
--- unicode escape - backslash is not escaped
-
-select to_jsonb(text '\uabcd');
-
--- any other backslash is escaped
-
-select to_jsonb(text '\abcd');
-
  --jsonb_agg
  
  CREATE TEMP TABLE rows AS
@@ -488,9 +481,18 @@ SELECT jsonb '{ "a":  "\ud83dX" }' -> 'a'; -- orphan high surrogate
  SELECT jsonb '{ "a":  "\ude04X" }' -> 'a'; -- orphan low surrogate
  
  -- handling of simple unicode escapes
-SELECT jsonb '{ "a":  "the Copyright \u00a9 sign" }' ->> 'a' AS correct_in_utf8;
-SELECT jsonb '{ "a":  "dollar \u0024 character" }' ->> 'a' AS correct_everyWHERE;
-SELECT jsonb '{ "a":  "null \u0000 escape" }' ->> 'a' AS not_unescaped;
+
+SELECT jsonb '{ "a":  "the Copyright \u00a9 sign" }' as correct_in_utf8;
+SELECT jsonb '{ "a":  "dollar \u0024 character" }' as correct_everywhere;
+SELECT jsonb '{ "a":  "dollar \\u0024 character" }' as not_an_escape;
+SELECT jsonb '{ "a":  "null \u0000 escape" }' as fails;
+SELECT jsonb '{ "a":  "null \\u0000 escape" }' as not_an_escape;
+
+SELECT jsonb '{ "a":  "the Copyright \u00a9 sign" }' ->> 'a' as correct_in_utf8;
+SELECT jsonb '{ "a":  "dollar \u0024 character" }' ->> 'a' as correct_everywhere;
+SELECT jsonb '{ "a":  "dollar \\u0024 character" }' ->> 'a' as not_an_escape;
+SELECT jsonb '{ "a":  "null \u0000 escape" }' ->> 'a' as fails;
+SELECT jsonb '{ "a":  "null \\u0000 escape" }' ->> 'a' as not_an_escape;
  
  -- jsonb_to_record and jsonb_to_recordset
author	Tom Lane <tgl@sss.pgh.pa.us>
	Fri, 30 Jan 2015 19:44:46 +0000 (14:44 -0500)
committer	Tom Lane <tgl@sss.pgh.pa.us>
	Fri, 30 Jan 2015 19:44:56 +0000 (14:44 -0500)
doc/src/sgml/json.sgml		patch \| blob \| history
doc/src/sgml/release-9.4.sgml		patch \| blob \| history
src/backend/utils/adt/json.c		patch \| blob \| history
src/test/regress/expected/json.out		patch \| blob \| history
src/test/regress/expected/json_1.out		patch \| blob \| history
src/test/regress/expected/jsonb.out		patch \| blob \| history
src/test/regress/expected/jsonb_1.out		patch \| blob \| history
src/test/regress/sql/json.sql		patch \| blob \| history
src/test/regress/sql/jsonb.sql		patch \| blob \| history