Fix jsonb Unicode escape processing, and in consequence disallow \u0000.

author Tom Lane <tgl@sss.pgh.pa.us>

Fri, 30 Jan 2015 19:44:49 +0000 (14:44 -0500)

committer Tom Lane <tgl@sss.pgh.pa.us>

Fri, 30 Jan 2015 19:44:49 +0000 (14:44 -0500)
author Tom Lane <tgl@sss.pgh.pa.us>
Fri, 30 Jan 2015 19:44:49 +0000 (14:44 -0500)
committer Tom Lane <tgl@sss.pgh.pa.us>
Fri, 30 Jan 2015 19:44:49 +0000 (14:44 -0500)
diff --git a/doc/src/sgml/json.sgml b/doc/src/sgml/json.sgml

index 8feb2fbf0ad251833380c8da7fd62c876ef330e8..6282ab885397683428197d526bbb226093184c9e 100644 (file)
--- a/doc/src/sgml/json.sgml
+++ b/doc/src/sgml/json.sgml
@@ -69,12 +69,14 @@
    regardless of the database encoding, and are checked only for syntactic
    correctness (that is, that four hex digits follow <literal>\u</>).
    However, the input function for <type>jsonb</> is stricter: it disallows
-  Unicode escapes for non-ASCII characters (those
-  above <literal>U+007F</>) unless the database encoding is UTF8.  It also
-  insists that any use of Unicode surrogate pairs to designate characters
-  outside the Unicode Basic Multilingual Plane be correct.  Valid Unicode
-  escapes, except for <literal>\u0000</>, are then converted to the
-  equivalent ASCII or UTF8 character for storage.
+  Unicode escapes for non-ASCII characters (those above <literal>U+007F</>)
+  unless the database encoding is UTF8.  The <type>jsonb</> type also
+  rejects <literal>\u0000</> (because that cannot be represented in
+  <productname>PostgreSQL</productname>'s <type>text</> type), and it insists
+  that any use of Unicode surrogate pairs to designate characters outside
+  the Unicode Basic Multilingual Plane be correct.  Valid Unicode escapes
+  are converted to the equivalent ASCII or UTF8 character for storage;
+  this includes folding surrogate pairs into a single character.
   </para>
  
   <note>
@@ -101,7 +103,7 @@
    constitutes valid <type>jsonb</type> data that do not apply to
    the <type>json</type> type, nor to JSON in the abstract, corresponding
    to limits on what can be represented by the underlying data type.
-  Specifically, <type>jsonb</> will reject numbers that are outside the
+  Notably, <type>jsonb</> will reject numbers that are outside the
    range of the <productname>PostgreSQL</productname> <type>numeric</> data
    type, while <type>json</> will not.  Such implementation-defined
    restrictions are permitted by <acronym>RFC</> 7159.  However, in
@@ -134,7 +136,8 @@
         <row>
          <entry><type>string</></entry>
          <entry><type>text</></entry>
-        <entry>See notes above concerning encoding restrictions</entry>
+        <entry><literal>\u0000</> is disallowed, as are non-ASCII Unicode
+         escapes if database encoding is not UTF8</entry>
         </row>
         <row>
          <entry><type>number</></entry>
diff --git a/doc/src/sgml/release-9.4.sgml b/doc/src/sgml/release-9.4.sgml

index 961e4617978e965ab065ee368c28e0efc23f15f4..11bbf3bf36ce6b28c777bfda293a8496864400e4 100644 (file)
--- a/doc/src/sgml/release-9.4.sgml
+++ b/doc/src/sgml/release-9.4.sgml
@@ -101,22 +101,6 @@
       </para>
      </listitem>
  
-    <listitem>
-     <para>
-      Unicode escapes in <link linkend="datatype-json"><type>JSON</type></link>
-      text values are no longer rendered with the backslash escaped
-      (Andrew Dunstan)
-     </para>
-
-     <para>
-      Previously, all backslashes in text values being formed into JSON
-      were escaped. Now a backslash followed by <literal>u</> and four
-      hexadecimal digits is not escaped, as this is a legal sequence in a
-      JSON string value, and escaping the backslash led to some perverse
-      results.
-     </para>
-    </listitem>
-
      <listitem>
       <para>
        When converting values of type <type>date</>, <type>timestamp</>
diff --git a/src/backend/utils/adt/json.c b/src/backend/utils/adt/json.c

index 1d6b752a28b6ad40d5cf088c14ad3e481dcde716..48f03e0b36ae594d6edd46b76dca71aea5969e66 100644 (file)
--- a/src/backend/utils/adt/json.c
+++ b/src/backend/utils/adt/json.c
@@ -807,14 +807,17 @@ json_lex_string(JsonLexContext *lex)
                                          * For UTF8, replace the escape sequence by the actual
                                          * utf8 character in lex->strval. Do this also for other
                                          * encodings if the escape designates an ASCII character,
-                                        * otherwise raise an error. We don't ever unescape a
-                                        * \u0000, since that would result in an impermissible nul
-                                        * byte.
+                                        * otherwise raise an error.
                                          */
  
                                         if (ch == 0)
                                         {
-                                               appendStringInfoString(lex->strval, "\\u0000");
+                                               /* We can't allow this, since our TEXT type doesn't */
+                                               ereport(ERROR,
+                                                               (errcode(ERRCODE_UNTRANSLATABLE_CHARACTER),
+                                                          errmsg("unsupported Unicode escape sequence"),
+                                                  errdetail("\\u0000 cannot be converted to text."),
+                                                                report_json_context(lex)));
                                         }
                                         else if (GetDatabaseEncoding() == PG_UTF8)
                                         {
@@ -834,8 +837,8 @@ json_lex_string(JsonLexContext *lex)
                                         else
                                         {
                                                 ereport(ERROR,
-                                                               (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
-                                                                errmsg("invalid input syntax for type json"),
+                                                               (errcode(ERRCODE_UNTRANSLATABLE_CHARACTER),
+                                                          errmsg("unsupported Unicode escape sequence"),
                                                                  errdetail("Unicode escape values cannot be used for code point values above 007F when the server encoding is not UTF8."),
                                                                  report_json_context(lex)));
                                         }
@@ -2374,30 +2377,7 @@ escape_json(StringInfo buf, const char *str)
                                 appendStringInfoString(buf, "\\\"");
                                 break;
                         case '\\':
-
-                               /*
-                                * Unicode escapes are passed through as is. There is no
-                                * requirement that they denote a valid character in the
-                                * server encoding - indeed that is a big part of their
-                                * usefulness.
-                                *
-                                * All we require is that they consist of \uXXXX where the Xs
-                                * are hexadecimal digits. It is the responsibility of the
-                                * caller of, say, to_json() to make sure that the unicode
-                                * escape is valid.
-                                *
-                                * In the case of a jsonb string value being escaped, the only
-                                * unicode escape that should be present is \u0000, all the
-                                * other unicode escapes will have been resolved.
-                                */
-                               if (p[1] == 'u' &&
-                                       isxdigit((unsigned char) p[2]) &&
-                                       isxdigit((unsigned char) p[3]) &&
-                                       isxdigit((unsigned char) p[4]) &&
-                                       isxdigit((unsigned char) p[5]))
-                                       appendStringInfoCharMacro(buf, *p);
-                               else
-                                       appendStringInfoString(buf, "\\\\");
+                               appendStringInfoString(buf, "\\\\");
                                 break;
                         default:
                                 if ((unsigned char) *p < ' ')
diff --git a/src/test/regress/expected/json.out b/src/test/regress/expected/json.out

index bb4d9ed4bebca91f9c1e37b54fc1f8dbc17832f3..c916678427d067f94705e6da8c85b5fa720190b5 100644 (file)
--- a/src/test/regress/expected/json.out
+++ b/src/test/regress/expected/json.out
@@ -426,20 +426,6 @@ select to_json(timestamptz '2014-05-28 12:22:35.614298-04');
  (1 row)
  
  COMMIT;
--- unicode escape - backslash is not escaped
-select to_json(text '\uabcd');
- to_json  
-----------
- "\uabcd"
-(1 row)
-
--- any other backslash is escaped
-select to_json(text '\abcd');
- to_json  
-----------
- "\\abcd"
-(1 row)
-
  --json_agg
  SELECT json_agg(q)
    FROM ( SELECT $$a$$ || x AS b, y AS c,
@@ -1400,6 +1386,36 @@ ERROR:  invalid input syntax for type json
  DETAIL:  Unicode low surrogate must follow a high surrogate.
  CONTEXT:  JSON data, line 1: { "a":...
  --handling of simple unicode escapes
+select json '{ "a":  "the Copyright \u00a9 sign" }' as correct_in_utf8;
+            correct_in_utf8            
+---------------------------------------
+ { "a":  "the Copyright \u00a9 sign" }
+(1 row)
+
+select json '{ "a":  "dollar \u0024 character" }' as correct_everywhere;
+         correct_everywhere          
+-------------------------------------
+ { "a":  "dollar \u0024 character" }
+(1 row)
+
+select json '{ "a":  "dollar \\u0024 character" }' as not_an_escape;
+            not_an_escape             
+--------------------------------------
+ { "a":  "dollar \\u0024 character" }
+(1 row)
+
+select json '{ "a":  "null \u0000 escape" }' as not_unescaped;
+         not_unescaped          
+--------------------------------
+ { "a":  "null \u0000 escape" }
+(1 row)
+
+select json '{ "a":  "null \\u0000 escape" }' as not_an_escape;
+          not_an_escape          
+---------------------------------
+ { "a":  "null \\u0000 escape" }
+(1 row)
+
  select json '{ "a":  "the Copyright \u00a9 sign" }' ->> 'a' as correct_in_utf8;
     correct_in_utf8    
  ----------------------
@@ -1412,8 +1428,18 @@ select json '{ "a":  "dollar \u0024 character" }' ->> 'a' as correct_everywhere;
   dollar $ character
  (1 row)
  
-select json '{ "a":  "null \u0000 escape" }' ->> 'a' as not_unescaped;
-   not_unescaped    
+select json '{ "a":  "dollar \\u0024 character" }' ->> 'a' as not_an_escape;
+      not_an_escape      
+-------------------------
+ dollar \u0024 character
+(1 row)
+
+select json '{ "a":  "null \u0000 escape" }' ->> 'a' as fails;
+ERROR:  unsupported Unicode escape sequence
+DETAIL:  \u0000 cannot be converted to text.
+CONTEXT:  JSON data, line 1: { "a":...
+select json '{ "a":  "null \\u0000 escape" }' ->> 'a' as not_an_escape;
+   not_an_escape    
  --------------------
   null \u0000 escape
  (1 row)
diff --git a/src/test/regress/expected/json_1.out b/src/test/regress/expected/json_1.out

index 83c1d7d492c16dd35b1c51ca9fef4b3e9bbd967e..ce63bfb227e29ebc425599f795812c1f59b295ff 100644 (file)
--- a/src/test/regress/expected/json_1.out
+++ b/src/test/regress/expected/json_1.out
@@ -426,20 +426,6 @@ select to_json(timestamptz '2014-05-28 12:22:35.614298-04');
  (1 row)
  
  COMMIT;
--- unicode escape - backslash is not escaped
-select to_json(text '\uabcd');
- to_json  
-----------
- "\uabcd"
-(1 row)
-
--- any other backslash is escaped
-select to_json(text '\abcd');
- to_json  
-----------
- "\\abcd"
-(1 row)
-
  --json_agg
  SELECT json_agg(q)
    FROM ( SELECT $$a$$ || x AS b, y AS c,
@@ -1378,7 +1364,7 @@ select * from json_populate_recordset(row('def',99,null)::jpop,'[{"a":[100,200,3
  
  -- handling of unicode surrogate pairs
  select json '{ "a":  "\ud83d\ude04\ud83d\udc36" }' -> 'a' as correct_in_utf8;
-ERROR:  invalid input syntax for type json
+ERROR:  unsupported Unicode escape sequence
  DETAIL:  Unicode escape values cannot be used for code point values above 007F when the server encoding is not UTF8.
  CONTEXT:  JSON data, line 1: { "a":...
  select json '{ "a":  "\ud83d\ud83d" }' -> 'a'; -- 2 high surrogates in a row
@@ -1398,8 +1384,38 @@ ERROR:  invalid input syntax for type json
  DETAIL:  Unicode low surrogate must follow a high surrogate.
  CONTEXT:  JSON data, line 1: { "a":...
  --handling of simple unicode escapes
+select json '{ "a":  "the Copyright \u00a9 sign" }' as correct_in_utf8;
+            correct_in_utf8            
+---------------------------------------
+ { "a":  "the Copyright \u00a9 sign" }
+(1 row)
+
+select json '{ "a":  "dollar \u0024 character" }' as correct_everywhere;
+         correct_everywhere          
+-------------------------------------
+ { "a":  "dollar \u0024 character" }
+(1 row)
+
+select json '{ "a":  "dollar \\u0024 character" }' as not_an_escape;
+            not_an_escape             
+--------------------------------------
+ { "a":  "dollar \\u0024 character" }
+(1 row)
+
+select json '{ "a":  "null \u0000 escape" }' as not_unescaped;
+         not_unescaped          
+--------------------------------
+ { "a":  "null \u0000 escape" }
+(1 row)
+
+select json '{ "a":  "null \\u0000 escape" }' as not_an_escape;
+          not_an_escape          
+---------------------------------
+ { "a":  "null \\u0000 escape" }
+(1 row)
+
  select json '{ "a":  "the Copyright \u00a9 sign" }' ->> 'a' as correct_in_utf8;
-ERROR:  invalid input syntax for type json
+ERROR:  unsupported Unicode escape sequence
  DETAIL:  Unicode escape values cannot be used for code point values above 007F when the server encoding is not UTF8.
  CONTEXT:  JSON data, line 1: { "a":...
  select json '{ "a":  "dollar \u0024 character" }' ->> 'a' as correct_everywhere;
@@ -1408,8 +1424,18 @@ select json '{ "a":  "dollar \u0024 character" }' ->> 'a' as correct_everywhere;
   dollar $ character
  (1 row)
  
-select json '{ "a":  "null \u0000 escape" }' ->> 'a' as not_unescaped;
-   not_unescaped    
+select json '{ "a":  "dollar \\u0024 character" }' ->> 'a' as not_an_escape;
+      not_an_escape      
+-------------------------
+ dollar \u0024 character
+(1 row)
+
+select json '{ "a":  "null \u0000 escape" }' ->> 'a' as fails;
+ERROR:  unsupported Unicode escape sequence
+DETAIL:  \u0000 cannot be converted to text.
+CONTEXT:  JSON data, line 1: { "a":...
+select json '{ "a":  "null \\u0000 escape" }' ->> 'a' as not_an_escape;
+   not_an_escape    
  --------------------
   null \u0000 escape
  (1 row)
diff --git a/src/test/regress/expected/jsonb.out b/src/test/regress/expected/jsonb.out

index 9146f59435b62f4a4862dc79791b1aa313c87804..523f50c5465c8a07b11b250259cf1b6902d90e87 100644 (file)
--- a/src/test/regress/expected/jsonb.out
+++ b/src/test/regress/expected/jsonb.out
@@ -60,12 +60,18 @@ LINE 1: SELECT '"\u000g"'::jsonb;
                 ^
  DETAIL:  "\u" must be followed by four hexadecimal digits.
  CONTEXT:  JSON data, line 1: "\u000g...
-SELECT '"\u0000"'::jsonb;              -- OK, legal escape
-  jsonb   
-----------
- "\u0000"
+SELECT '"\u0045"'::jsonb;              -- OK, legal escape
+ jsonb 
+-------
+ "E"
  (1 row)
  
+SELECT '"\u0000"'::jsonb;              -- ERROR, we don't support U+0000
+ERROR:  unsupported Unicode escape sequence
+LINE 1: SELECT '"\u0000"'::jsonb;
+               ^
+DETAIL:  \u0000 cannot be converted to text.
+CONTEXT:  JSON data, line 1: ...
  -- use octet_length here so we don't get an odd unicode char in the
  -- output
  SELECT octet_length('"\uaBcD"'::jsonb::text); -- OK, uppercase and lower case both OK
@@ -1798,20 +1804,62 @@ LINE 1: SELECT jsonb '{ "a":  "\ude04X" }' -> 'a';
  DETAIL:  Unicode low surrogate must follow a high surrogate.
  CONTEXT:  JSON data, line 1: { "a":...
  -- handling of simple unicode escapes
-SELECT jsonb '{ "a":  "the Copyright \u00a9 sign" }' ->> 'a' AS correct_in_utf8;
+SELECT jsonb '{ "a":  "the Copyright \u00a9 sign" }' as correct_in_utf8;
+        correct_in_utf8        
+-------------------------------
+ {"a": "the Copyright © sign"}
+(1 row)
+
+SELECT jsonb '{ "a":  "dollar \u0024 character" }' as correct_everywhere;
+     correct_everywhere      
+-----------------------------
+ {"a": "dollar $ character"}
+(1 row)
+
+SELECT jsonb '{ "a":  "dollar \\u0024 character" }' as not_an_escape;
+           not_an_escape           
+-----------------------------------
+ {"a": "dollar \\u0024 character"}
+(1 row)
+
+SELECT jsonb '{ "a":  "null \u0000 escape" }' as fails;
+ERROR:  unsupported Unicode escape sequence
+LINE 1: SELECT jsonb '{ "a":  "null \u0000 escape" }' as fails;
+                     ^
+DETAIL:  \u0000 cannot be converted to text.
+CONTEXT:  JSON data, line 1: { "a":...
+SELECT jsonb '{ "a":  "null \\u0000 escape" }' as not_an_escape;
+        not_an_escape         
+------------------------------
+ {"a": "null \\u0000 escape"}
+(1 row)
+
+SELECT jsonb '{ "a":  "the Copyright \u00a9 sign" }' ->> 'a' as correct_in_utf8;
     correct_in_utf8    
  ----------------------
   the Copyright © sign
  (1 row)
  
-SELECT jsonb '{ "a":  "dollar \u0024 character" }' ->> 'a' AS correct_everyWHERE;
+SELECT jsonb '{ "a":  "dollar \u0024 character" }' ->> 'a' as correct_everywhere;
   correct_everywhere 
  --------------------
   dollar $ character
  (1 row)
  
-SELECT jsonb '{ "a":  "null \u0000 escape" }' ->> 'a' AS not_unescaped;
-   not_unescaped    
+SELECT jsonb '{ "a":  "dollar \\u0024 character" }' ->> 'a' as not_an_escape;
+      not_an_escape      
+-------------------------
+ dollar \u0024 character
+(1 row)
+
+SELECT jsonb '{ "a":  "null \u0000 escape" }' ->> 'a' as fails;
+ERROR:  unsupported Unicode escape sequence
+LINE 1: SELECT jsonb '{ "a":  "null \u0000 escape" }' ->> 'a' as fai...
+                     ^
+DETAIL:  \u0000 cannot be converted to text.
+CONTEXT:  JSON data, line 1: { "a":...
+SELECT jsonb '{ "a":  "null \\u0000 escape" }' ->> 'a' as not_an_escape;
+   not_an_escape    
  --------------------
   null \u0000 escape
  (1 row)
diff --git a/src/test/regress/expected/jsonb_1.out b/src/test/regress/expected/jsonb_1.out

index 83d61f8c7e0ddcf508abdba6ff895c98956aa56f..eee22b4883cbdf7d69daf83b95b6f829d7e73fb9 100644 (file)
--- a/src/test/regress/expected/jsonb_1.out
+++ b/src/test/regress/expected/jsonb_1.out
@@ -60,16 +60,22 @@ LINE 1: SELECT '"\u000g"'::jsonb;
                 ^
  DETAIL:  "\u" must be followed by four hexadecimal digits.
  CONTEXT:  JSON data, line 1: "\u000g...
-SELECT '"\u0000"'::jsonb;              -- OK, legal escape
-  jsonb   
-----------
- "\u0000"
+SELECT '"\u0045"'::jsonb;              -- OK, legal escape
+ jsonb 
+-------
+ "E"
  (1 row)
  
+SELECT '"\u0000"'::jsonb;              -- ERROR, we don't support U+0000
+ERROR:  unsupported Unicode escape sequence
+LINE 1: SELECT '"\u0000"'::jsonb;
+               ^
+DETAIL:  \u0000 cannot be converted to text.
+CONTEXT:  JSON data, line 1: ...
  -- use octet_length here so we don't get an odd unicode char in the
  -- output
  SELECT octet_length('"\uaBcD"'::jsonb::text); -- OK, uppercase and lower case both OK
-ERROR:  invalid input syntax for type json
+ERROR:  unsupported Unicode escape sequence
  LINE 1: SELECT octet_length('"\uaBcD"'::jsonb::text);
                              ^
  DETAIL:  Unicode escape values cannot be used for code point values above 007F when the server encoding is not UTF8.
@@ -1768,7 +1774,7 @@ SELECT * FROM jsonb_populate_recordset(row('def',99,NULL)::jbpop,'[{"a":[100,200
  
  -- handling of unicode surrogate pairs
  SELECT octet_length((jsonb '{ "a":  "\ud83d\ude04\ud83d\udc36" }' -> 'a')::text) AS correct_in_utf8;
-ERROR:  invalid input syntax for type json
+ERROR:  unsupported Unicode escape sequence
  LINE 1: SELECT octet_length((jsonb '{ "a":  "\ud83d\ude04\ud83d\udc3...
                                     ^
  DETAIL:  Unicode escape values cannot be used for code point values above 007F when the server encoding is not UTF8.
@@ -1798,20 +1804,62 @@ LINE 1: SELECT jsonb '{ "a":  "\ude04X" }' -> 'a';
  DETAIL:  Unicode low surrogate must follow a high surrogate.
  CONTEXT:  JSON data, line 1: { "a":...
  -- handling of simple unicode escapes
-SELECT jsonb '{ "a":  "the Copyright \u00a9 sign" }' ->> 'a' AS correct_in_utf8;
-ERROR:  invalid input syntax for type json
+SELECT jsonb '{ "a":  "the Copyright \u00a9 sign" }' as correct_in_utf8;
+ERROR:  unsupported Unicode escape sequence
+LINE 1: SELECT jsonb '{ "a":  "the Copyright \u00a9 sign" }' as corr...
+                     ^
+DETAIL:  Unicode escape values cannot be used for code point values above 007F when the server encoding is not UTF8.
+CONTEXT:  JSON data, line 1: { "a":...
+SELECT jsonb '{ "a":  "dollar \u0024 character" }' as correct_everywhere;
+     correct_everywhere      
+-----------------------------
+ {"a": "dollar $ character"}
+(1 row)
+
+SELECT jsonb '{ "a":  "dollar \\u0024 character" }' as not_an_escape;
+           not_an_escape           
+-----------------------------------
+ {"a": "dollar \\u0024 character"}
+(1 row)
+
+SELECT jsonb '{ "a":  "null \u0000 escape" }' as fails;
+ERROR:  unsupported Unicode escape sequence
+LINE 1: SELECT jsonb '{ "a":  "null \u0000 escape" }' as fails;
+                     ^
+DETAIL:  \u0000 cannot be converted to text.
+CONTEXT:  JSON data, line 1: { "a":...
+SELECT jsonb '{ "a":  "null \\u0000 escape" }' as not_an_escape;
+        not_an_escape         
+------------------------------
+ {"a": "null \\u0000 escape"}
+(1 row)
+
+SELECT jsonb '{ "a":  "the Copyright \u00a9 sign" }' ->> 'a' as correct_in_utf8;
+ERROR:  unsupported Unicode escape sequence
  LINE 1: SELECT jsonb '{ "a":  "the Copyright \u00a9 sign" }' ->> 'a'...
                       ^
  DETAIL:  Unicode escape values cannot be used for code point values above 007F when the server encoding is not UTF8.
  CONTEXT:  JSON data, line 1: { "a":...
-SELECT jsonb '{ "a":  "dollar \u0024 character" }' ->> 'a' AS correct_everyWHERE;
+SELECT jsonb '{ "a":  "dollar \u0024 character" }' ->> 'a' as correct_everywhere;
   correct_everywhere 
  --------------------
   dollar $ character
  (1 row)
  
-SELECT jsonb '{ "a":  "null \u0000 escape" }' ->> 'a' AS not_unescaped;
-   not_unescaped    
+SELECT jsonb '{ "a":  "dollar \\u0024 character" }' ->> 'a' as not_an_escape;
+      not_an_escape      
+-------------------------
+ dollar \u0024 character
+(1 row)
+
+SELECT jsonb '{ "a":  "null \u0000 escape" }' ->> 'a' as fails;
+ERROR:  unsupported Unicode escape sequence
+LINE 1: SELECT jsonb '{ "a":  "null \u0000 escape" }' ->> 'a' as fai...
+                     ^
+DETAIL:  \u0000 cannot be converted to text.
+CONTEXT:  JSON data, line 1: { "a":...
+SELECT jsonb '{ "a":  "null \\u0000 escape" }' ->> 'a' as not_an_escape;
+   not_an_escape    
  --------------------
   null \u0000 escape
  (1 row)
diff --git a/src/test/regress/sql/json.sql b/src/test/regress/sql/json.sql

index c9801321e09069570e7e79db341ba7848c7af065..a4eaa1fbc0bf29b38f2f99b583012e992fca5ad4 100644 (file)
--- a/src/test/regress/sql/json.sql
+++ b/src/test/regress/sql/json.sql
@@ -111,14 +111,6 @@ SET LOCAL TIME ZONE -8;
  select to_json(timestamptz '2014-05-28 12:22:35.614298-04');
  COMMIT;
  
--- unicode escape - backslash is not escaped
-
-select to_json(text '\uabcd');
-
--- any other backslash is escaped
-
-select to_json(text '\abcd');
-
  --json_agg
  
  SELECT json_agg(q)
@@ -401,9 +393,17 @@ select json '{ "a":  "\ude04X" }' -> 'a'; -- orphan low surrogate
  
  --handling of simple unicode escapes
  
+select json '{ "a":  "the Copyright \u00a9 sign" }' as correct_in_utf8;
+select json '{ "a":  "dollar \u0024 character" }' as correct_everywhere;
+select json '{ "a":  "dollar \\u0024 character" }' as not_an_escape;
+select json '{ "a":  "null \u0000 escape" }' as not_unescaped;
+select json '{ "a":  "null \\u0000 escape" }' as not_an_escape;
+
  select json '{ "a":  "the Copyright \u00a9 sign" }' ->> 'a' as correct_in_utf8;
  select json '{ "a":  "dollar \u0024 character" }' ->> 'a' as correct_everywhere;
-select json '{ "a":  "null \u0000 escape" }' ->> 'a' as not_unescaped;
+select json '{ "a":  "dollar \\u0024 character" }' ->> 'a' as not_an_escape;
+select json '{ "a":  "null \u0000 escape" }' ->> 'a' as fails;
+select json '{ "a":  "null \\u0000 escape" }' ->> 'a' as not_an_escape;
  
  --json_typeof() function
  select value, json_typeof(value)
diff --git a/src/test/regress/sql/jsonb.sql b/src/test/regress/sql/jsonb.sql

index f1ed021be2d3f4d2724428e4432b26e3aca00a08..a866584873198d292d766a84981cdfc25968db83 100644 (file)
--- a/src/test/regress/sql/jsonb.sql
+++ b/src/test/regress/sql/jsonb.sql
@@ -10,7 +10,8 @@ SELECT '"\v"'::jsonb;                 -- ERROR, not a valid JSON escape
  SELECT '"\u"'::jsonb;                  -- ERROR, incomplete escape
  SELECT '"\u00"'::jsonb;                        -- ERROR, incomplete escape
  SELECT '"\u000g"'::jsonb;              -- ERROR, g is not a hex digit
-SELECT '"\u0000"'::jsonb;              -- OK, legal escape
+SELECT '"\u0045"'::jsonb;              -- OK, legal escape
+SELECT '"\u0000"'::jsonb;              -- ERROR, we don't support U+0000
  -- use octet_length here so we don't get an odd unicode char in the
  -- output
  SELECT octet_length('"\uaBcD"'::jsonb::text); -- OK, uppercase and lower case both OK
@@ -373,9 +374,18 @@ SELECT jsonb '{ "a":  "\ud83dX" }' -> 'a'; -- orphan high surrogate
  SELECT jsonb '{ "a":  "\ude04X" }' -> 'a'; -- orphan low surrogate
  
  -- handling of simple unicode escapes
-SELECT jsonb '{ "a":  "the Copyright \u00a9 sign" }' ->> 'a' AS correct_in_utf8;
-SELECT jsonb '{ "a":  "dollar \u0024 character" }' ->> 'a' AS correct_everyWHERE;
-SELECT jsonb '{ "a":  "null \u0000 escape" }' ->> 'a' AS not_unescaped;
+
+SELECT jsonb '{ "a":  "the Copyright \u00a9 sign" }' as correct_in_utf8;
+SELECT jsonb '{ "a":  "dollar \u0024 character" }' as correct_everywhere;
+SELECT jsonb '{ "a":  "dollar \\u0024 character" }' as not_an_escape;
+SELECT jsonb '{ "a":  "null \u0000 escape" }' as fails;
+SELECT jsonb '{ "a":  "null \\u0000 escape" }' as not_an_escape;
+
+SELECT jsonb '{ "a":  "the Copyright \u00a9 sign" }' ->> 'a' as correct_in_utf8;
+SELECT jsonb '{ "a":  "dollar \u0024 character" }' ->> 'a' as correct_everywhere;
+SELECT jsonb '{ "a":  "dollar \\u0024 character" }' ->> 'a' as not_an_escape;
+SELECT jsonb '{ "a":  "null \u0000 escape" }' ->> 'a' as fails;
+SELECT jsonb '{ "a":  "null \\u0000 escape" }' ->> 'a' as not_an_escape;
  
  -- jsonb_to_record and jsonb_to_recordset
author	Tom Lane <tgl@sss.pgh.pa.us>
	Fri, 30 Jan 2015 19:44:49 +0000 (14:44 -0500)
committer	Tom Lane <tgl@sss.pgh.pa.us>
	Fri, 30 Jan 2015 19:44:49 +0000 (14:44 -0500)
doc/src/sgml/json.sgml		patch \| blob \| history
doc/src/sgml/release-9.4.sgml		patch \| blob \| history
src/backend/utils/adt/json.c		patch \| blob \| history
src/test/regress/expected/json.out		patch \| blob \| history
src/test/regress/expected/json_1.out		patch \| blob \| history
src/test/regress/expected/jsonb.out		patch \| blob \| history
src/test/regress/expected/jsonb_1.out		patch \| blob \| history
src/test/regress/sql/json.sql		patch \| blob \| history
src/test/regress/sql/jsonb.sql		patch \| blob \| history