Do not escape a unicode sequence when escaping JSON text.

author Andrew Dunstan <andrew@dunslane.net>

Tue, 3 Jun 2014 20:11:31 +0000 (16:11 -0400)

committer Andrew Dunstan <andrew@dunslane.net>

Tue, 3 Jun 2014 20:11:31 +0000 (16:11 -0400)
author Andrew Dunstan <andrew@dunslane.net>
Tue, 3 Jun 2014 20:11:31 +0000 (16:11 -0400)
committer Andrew Dunstan <andrew@dunslane.net>
Tue, 3 Jun 2014 20:11:31 +0000 (16:11 -0400)
diff --git a/doc/src/sgml/release-9.4.sgml b/doc/src/sgml/release-9.4.sgml

index 08307c8f667c72f1bce7afd716ceeb830d697f7e..3f30c636ba2f091bcd4c8a4535cf70a2f9645505 100644 (file)
--- a/doc/src/sgml/release-9.4.sgml
+++ b/doc/src/sgml/release-9.4.sgml
@@ -180,6 +180,21 @@
       </para>
      </listitem>
  
+    <listitem>
+     <para>
+      Unicode escapes in <link linkend="datatype-json"><type>JSON</type></link>
+      text values are no longer rendered with the backslash escaped.
+      (Andrew Dunstan)
+     </para>
+
+     <para>
+      Previously all backslashes in text values being formed into JSON were
+      escaped. Now a backslash followed by "u" and four hexadecimal digits is
+      not escaped, as this is a legal sequence in a JSON string value, and
+      escaping the backslash led to some perverse results.
+     </para>
+    </listitem>
+
      <listitem>
       <para>
        Rename <link linkend="SQL-EXPLAIN"><command>EXPLAIN
diff --git a/src/backend/utils/adt/json.c b/src/backend/utils/adt/json.c

index 2462111ecb3050cd0836677dda85e24fbd747860..8ca1ede83fb355aefcfa472fcd181168fcfa3a0a 100644 (file)
--- a/src/backend/utils/adt/json.c
+++ b/src/backend/utils/adt/json.c
@@ -2315,7 +2315,26 @@ escape_json(StringInfo buf, const char *str)
                                 appendStringInfoString(buf, "\\\"");
                                 break;
                         case '\\':
-                               appendStringInfoString(buf, "\\\\");
+                               /*
+                                * Unicode escapes are passed through as is. There is no
+                                * requirement that they denote a valid character in the
+                                * server encoding - indeed that is a big part of their
+                                * usefulness.
+                                *
+                                * All we require is that they consist of \uXXXX where
+                                * the Xs are hexadecimal digits. It is the responsibility
+                                * of the caller of, say, to_json() to make sure that the
+                                * unicode escape is valid.
+                                *
+                                * In the case of a jsonb string value being escaped, the
+                                * only unicode escape that should be present is \u0000,
+                                * all the other unicode escapes will have been resolved.
+                                */
+                               if (p[1] == 'u' && isxdigit(p[2]) && isxdigit(p[3])
+                                       && isxdigit(p[4]) && isxdigit(p[5]))
+                                       appendStringInfoCharMacro(buf, *p);
+                               else
+                                       appendStringInfoString(buf, "\\\\");
                                 break;
                         default:
                                 if ((unsigned char) *p < ' ')
diff --git a/src/test/regress/expected/json.out b/src/test/regress/expected/json.out

index c4dc8b0e3cb73f07ece801a7e8893ddf3b730b76..43341aa9bb5b62f5f20821c80acdce91f4cc2970 100644 (file)
--- a/src/test/regress/expected/json.out
+++ b/src/test/regress/expected/json.out
@@ -426,6 +426,20 @@ select to_json(timestamptz '2014-05-28 12:22:35.614298-04');
  (1 row)
  
  COMMIT;
+-- unicode escape - backslash is not escaped
+select to_json(text '\uabcd');
+ to_json  
+----------
+ "\uabcd"
+(1 row)
+
+-- any other backslash is escaped
+select to_json(text '\abcd');
+ to_json  
+----------
+ "\\abcd"
+(1 row)
+
  --json_agg
  SELECT json_agg(q)
    FROM ( SELECT $$a$$ || x AS b, y AS c,
diff --git a/src/test/regress/expected/json_1.out b/src/test/regress/expected/json_1.out

index 629e98e6c5f26952af1d01f2259ba7a2c160b473..953324637d8736f6a8a98fad148b40c6062bd44b 100644 (file)
--- a/src/test/regress/expected/json_1.out
+++ b/src/test/regress/expected/json_1.out
@@ -426,6 +426,20 @@ select to_json(timestamptz '2014-05-28 12:22:35.614298-04');
  (1 row)
  
  COMMIT;
+-- unicode escape - backslash is not escaped
+select to_json(text '\uabcd');
+ to_json  
+----------
+ "\uabcd"
+(1 row)
+
+-- any other backslash is escaped
+select to_json(text '\abcd');
+ to_json  
+----------
+ "\\abcd"
+(1 row)
+
  --json_agg
  SELECT json_agg(q)
    FROM ( SELECT $$a$$ || x AS b, y AS c,
diff --git a/src/test/regress/expected/jsonb.out b/src/test/regress/expected/jsonb.out

index ae7c5068119e7054b5febe7b75ac3992dd244019..1e46939b6fe9c28a169d0a234c33ada46fdb8ccc 100644 (file)
--- a/src/test/regress/expected/jsonb.out
+++ b/src/test/regress/expected/jsonb.out
@@ -61,9 +61,9 @@ LINE 1: SELECT '"\u000g"'::jsonb;
  DETAIL:  "\u" must be followed by four hexadecimal digits.
  CONTEXT:  JSON data, line 1: "\u000g...
  SELECT '"\u0000"'::jsonb;              -- OK, legal escape
-   jsonb   
------------
- "\\u0000"
+  jsonb   
+----------
+ "\u0000"
  (1 row)
  
  -- use octet_length here so we don't get an odd unicode char in the
diff --git a/src/test/regress/expected/jsonb_1.out b/src/test/regress/expected/jsonb_1.out

index 38a95b43f8c17ed536819abb1745fc667a177463..955dc424dce1b42237d7549b6b079b2a49133d8d 100644 (file)
--- a/src/test/regress/expected/jsonb_1.out
+++ b/src/test/regress/expected/jsonb_1.out
@@ -61,9 +61,9 @@ LINE 1: SELECT '"\u000g"'::jsonb;
  DETAIL:  "\u" must be followed by four hexadecimal digits.
  CONTEXT:  JSON data, line 1: "\u000g...
  SELECT '"\u0000"'::jsonb;              -- OK, legal escape
-   jsonb   
------------
- "\\u0000"
+  jsonb   
+----------
+ "\u0000"
  (1 row)
  
  -- use octet_length here so we don't get an odd unicode char in the
diff --git a/src/test/regress/sql/json.sql b/src/test/regress/sql/json.sql

index 6c2faeccd30d2c64114709d659ca7454f62ffd80..3d5ed50126e83bd28defb25f0748cafc26a1ff3b 100644 (file)
--- a/src/test/regress/sql/json.sql
+++ b/src/test/regress/sql/json.sql
@@ -111,6 +111,14 @@ SET LOCAL TIME ZONE -8;
  select to_json(timestamptz '2014-05-28 12:22:35.614298-04');
  COMMIT;
  
+-- unicode escape - backslash is not escaped
+
+select to_json(text '\uabcd');
+
+-- any other backslash is escaped
+
+select to_json(text '\abcd');
+
  --json_agg
  
  SELECT json_agg(q)
author	Andrew Dunstan <andrew@dunslane.net>
	Tue, 3 Jun 2014 20:11:31 +0000 (16:11 -0400)
committer	Andrew Dunstan <andrew@dunslane.net>
	Tue, 3 Jun 2014 20:11:31 +0000 (16:11 -0400)
doc/src/sgml/release-9.4.sgml		patch \| blob \| history
src/backend/utils/adt/json.c		patch \| blob \| history
src/test/regress/expected/json.out		patch \| blob \| history
src/test/regress/expected/json_1.out		patch \| blob \| history
src/test/regress/expected/jsonb.out		patch \| blob \| history
src/test/regress/expected/jsonb_1.out		patch \| blob \| history
src/test/regress/sql/json.sql		patch \| blob \| history