Commit 0ad1a816 authored by Andrew Dunstan's avatar Andrew Dunstan

Do not escape a unicode sequence when escaping JSON text.

Previously, any backslash in text being escaped for JSON was doubled so
that the result was still valid JSON. However, this led to some perverse
results in the case of Unicode sequences, These are now detected and the
initial backslash is no longer escaped. All other backslashes are
still escaped. No validity check is performed, all that is looked for is
\uXXXX where X is a hexidecimal digit.

This is a change from the 9.2 and 9.3 behaviour as noted in the Release
notes.

Per complaint from Teodor Sigaev.
parent f30015b6
...@@ -180,6 +180,21 @@ ...@@ -180,6 +180,21 @@
</para> </para>
</listitem> </listitem>
<listitem>
<para>
Unicode escapes in <link linkend="datatype-json"><type>JSON</type></link>
text values are no longer rendered with the backslash escaped.
(Andrew Dunstan)
</para>
<para>
Previously all backslashes in text values being formed into JSON were
escaped. Now a backslash followed by "u" and four hexadecimal digits is
not escaped, as this is a legal sequence in a JSON string value, and
escaping the backslash led to some perverse results.
</para>
</listitem>
<listitem> <listitem>
<para> <para>
Rename <link linkend="SQL-EXPLAIN"><command>EXPLAIN Rename <link linkend="SQL-EXPLAIN"><command>EXPLAIN
......
...@@ -2315,6 +2315,25 @@ escape_json(StringInfo buf, const char *str) ...@@ -2315,6 +2315,25 @@ escape_json(StringInfo buf, const char *str)
appendStringInfoString(buf, "\\\""); appendStringInfoString(buf, "\\\"");
break; break;
case '\\': case '\\':
/*
* Unicode escapes are passed through as is. There is no
* requirement that they denote a valid character in the
* server encoding - indeed that is a big part of their
* usefulness.
*
* All we require is that they consist of \uXXXX where
* the Xs are hexadecimal digits. It is the responsibility
* of the caller of, say, to_json() to make sure that the
* unicode escape is valid.
*
* In the case of a jsonb string value being escaped, the
* only unicode escape that should be present is \u0000,
* all the other unicode escapes will have been resolved.
*/
if (p[1] == 'u' && isxdigit(p[2]) && isxdigit(p[3])
&& isxdigit(p[4]) && isxdigit(p[5]))
appendStringInfoCharMacro(buf, *p);
else
appendStringInfoString(buf, "\\\\"); appendStringInfoString(buf, "\\\\");
break; break;
default: default:
......
...@@ -426,6 +426,20 @@ select to_json(timestamptz '2014-05-28 12:22:35.614298-04'); ...@@ -426,6 +426,20 @@ select to_json(timestamptz '2014-05-28 12:22:35.614298-04');
(1 row) (1 row)
COMMIT; COMMIT;
-- unicode escape - backslash is not escaped
select to_json(text '\uabcd');
to_json
----------
"\uabcd"
(1 row)
-- any other backslash is escaped
select to_json(text '\abcd');
to_json
----------
"\\abcd"
(1 row)
--json_agg --json_agg
SELECT json_agg(q) SELECT json_agg(q)
FROM ( SELECT $$a$$ || x AS b, y AS c, FROM ( SELECT $$a$$ || x AS b, y AS c,
......
...@@ -426,6 +426,20 @@ select to_json(timestamptz '2014-05-28 12:22:35.614298-04'); ...@@ -426,6 +426,20 @@ select to_json(timestamptz '2014-05-28 12:22:35.614298-04');
(1 row) (1 row)
COMMIT; COMMIT;
-- unicode escape - backslash is not escaped
select to_json(text '\uabcd');
to_json
----------
"\uabcd"
(1 row)
-- any other backslash is escaped
select to_json(text '\abcd');
to_json
----------
"\\abcd"
(1 row)
--json_agg --json_agg
SELECT json_agg(q) SELECT json_agg(q)
FROM ( SELECT $$a$$ || x AS b, y AS c, FROM ( SELECT $$a$$ || x AS b, y AS c,
......
...@@ -62,8 +62,8 @@ DETAIL: "\u" must be followed by four hexadecimal digits. ...@@ -62,8 +62,8 @@ DETAIL: "\u" must be followed by four hexadecimal digits.
CONTEXT: JSON data, line 1: "\u000g... CONTEXT: JSON data, line 1: "\u000g...
SELECT '"\u0000"'::jsonb; -- OK, legal escape SELECT '"\u0000"'::jsonb; -- OK, legal escape
jsonb jsonb
----------- ----------
"\\u0000" "\u0000"
(1 row) (1 row)
-- use octet_length here so we don't get an odd unicode char in the -- use octet_length here so we don't get an odd unicode char in the
......
...@@ -62,8 +62,8 @@ DETAIL: "\u" must be followed by four hexadecimal digits. ...@@ -62,8 +62,8 @@ DETAIL: "\u" must be followed by four hexadecimal digits.
CONTEXT: JSON data, line 1: "\u000g... CONTEXT: JSON data, line 1: "\u000g...
SELECT '"\u0000"'::jsonb; -- OK, legal escape SELECT '"\u0000"'::jsonb; -- OK, legal escape
jsonb jsonb
----------- ----------
"\\u0000" "\u0000"
(1 row) (1 row)
-- use octet_length here so we don't get an odd unicode char in the -- use octet_length here so we don't get an odd unicode char in the
......
...@@ -111,6 +111,14 @@ SET LOCAL TIME ZONE -8; ...@@ -111,6 +111,14 @@ SET LOCAL TIME ZONE -8;
select to_json(timestamptz '2014-05-28 12:22:35.614298-04'); select to_json(timestamptz '2014-05-28 12:22:35.614298-04');
COMMIT; COMMIT;
-- unicode escape - backslash is not escaped
select to_json(text '\uabcd');
-- any other backslash is escaped
select to_json(text '\abcd');
--json_agg --json_agg
SELECT json_agg(q) SELECT json_agg(q)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment