Commit 636a939f authored by Tom Lane's avatar Tom Lane

Fix array_out's failure to backslash backslashes, per bug# 524. Also,

remove brain-dead rule that double quotes are needed if and only if the
datatype is pass-by-reference; neither direction of the implication holds
water.  Instead, examine the actual data string to see if it contains
any characters that force us to quote it.
Add some documentation about quoting of array values, which was previously
explained nowhere AFAICT.
parent a4e8cd30
<!-- $Header: /cvsroot/pgsql/doc/src/sgml/array.sgml,v 1.17 2001/11/28 20:49:09 petere Exp $ --> <!-- $Header: /cvsroot/pgsql/doc/src/sgml/array.sgml,v 1.18 2001/11/29 21:02:41 tgl Exp $ -->
<chapter id="arrays"> <chapter id="arrays">
<title>Arrays</title> <title>Arrays</title>
...@@ -248,4 +248,36 @@ SELECT * FROM sal_emp WHERE pay_by_quarter **= 10000; ...@@ -248,4 +248,36 @@ SELECT * FROM sal_emp WHERE pay_by_quarter **= 10000;
</para> </para>
</note> </note>
<formalpara>
<title>Quoting array elements.</title>
<para>
As shown above, when writing an array literal value you may write double
quotes around any individual array
element. You <emphasis>must</> do so if the element value would otherwise
confuse the array-value parser. For example, elements containing curly
braces, commas, double quotes, backslashes, or white space must be
double-quoted. To put a double quote or backslash in an array element
value, precede it with a backslash.
</para>
</formalpara>
<tip>
<para>
Remember that what you write in an SQL query will first be interpreted
as a string literal, and then as an array. This doubles the number of
backslashes you need. For example, to insert a <type>text</> array
value containing a backslash and a double quote, you'd need to write
<programlisting>
INSERT ... VALUES ('{"\\\\","\\""}');
</programlisting>
The string-literal processor removes one level of backslashes, so that
what arrives at the array-value parser looks like <literal>{"\\","\""}</>.
In turn, the strings fed to the <type>text</> datatype's input routine
become <literal>\</> and <literal>"</> respectively. (If we were working
with a datatype whose input routine also treated backslashes specially,
<type>bytea</> for example, we might need as many as eight backslashes
in the query to get one backslash into the stored array element.)
</para>
</tip>
</chapter> </chapter>
...@@ -8,7 +8,7 @@ ...@@ -8,7 +8,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/utils/adt/arrayfuncs.c,v 1.71 2001/10/25 05:49:43 momjian Exp $ * $Header: /cvsroot/pgsql/src/backend/utils/adt/arrayfuncs.c,v 1.72 2001/11/29 21:02:41 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -346,6 +346,7 @@ ArrayCount(char *str, int *dim, int typdelim) ...@@ -346,6 +346,7 @@ ArrayCount(char *str, int *dim, int typdelim)
* If element type is pass-by-ref, the Datums point to palloc'd values. * If element type is pass-by-ref, the Datums point to palloc'd values.
* *nbytes is set to the amount of data space needed for the array, * *nbytes is set to the amount of data space needed for the array,
* including alignment padding but not including array header overhead. * including alignment padding but not including array header overhead.
* CAUTION: the contents of "arrayStr" may be modified!
*--------------------------------------------------------------------------- *---------------------------------------------------------------------------
*/ */
static Datum * static Datum *
...@@ -564,16 +565,13 @@ array_out(PG_FUNCTION_ARGS) ...@@ -564,16 +565,13 @@ array_out(PG_FUNCTION_ARGS)
char *p, char *p,
*tmp, *tmp,
*retval, *retval,
**values, **values;
delim[2]; bool *needquotes;
int nitems, int nitems,
overall_length, overall_length,
i, i,
j, j,
k, k,
#ifndef TCL_ARRAYS
l,
#endif
indx[MAXDIM]; indx[MAXDIM];
int ndim, int ndim,
*dim; *dim;
...@@ -581,26 +579,29 @@ array_out(PG_FUNCTION_ARGS) ...@@ -581,26 +579,29 @@ array_out(PG_FUNCTION_ARGS)
system_cache_lookup(element_type, false, &typlen, &typbyval, system_cache_lookup(element_type, false, &typlen, &typbyval,
&typdelim, &typelem, &typoutput, &typalign); &typdelim, &typelem, &typoutput, &typalign);
fmgr_info(typoutput, &outputproc); fmgr_info(typoutput, &outputproc);
sprintf(delim, "%c", typdelim);
ndim = ARR_NDIM(v); ndim = ARR_NDIM(v);
dim = ARR_DIMS(v); dim = ARR_DIMS(v);
nitems = ArrayGetNItems(ndim, dim); nitems = ArrayGetNItems(ndim, dim);
if (nitems == 0) if (nitems == 0)
{ {
retval = (char *) palloc(3); retval = pstrdup("{}");
retval[0] = '{';
retval[1] = '}';
retval[2] = '\0';
PG_RETURN_CSTRING(retval); PG_RETURN_CSTRING(retval);
} }
/*
* Convert all values to string form, count total space needed
* (including any overhead such as escaping backslashes),
* and detect whether each item needs double quotes.
*/
values = (char **) palloc(nitems * sizeof(char *));
needquotes = (bool *) palloc(nitems * sizeof(bool));
p = ARR_DATA_PTR(v); p = ARR_DATA_PTR(v);
overall_length = 1; /* [TRH] don't forget to count \0 at end. */ overall_length = 1; /* [TRH] don't forget to count \0 at end. */
values = (char **) palloc(nitems * sizeof(char *));
for (i = 0; i < nitems; i++) for (i = 0; i < nitems; i++)
{ {
Datum itemvalue; Datum itemvalue;
bool nq;
itemvalue = fetch_att(p, typbyval, typlen); itemvalue = fetch_att(p, typbyval, typlen);
values[i] = DatumGetCString(FunctionCall3(&outputproc, values[i] = DatumGetCString(FunctionCall3(&outputproc,
...@@ -612,20 +613,32 @@ array_out(PG_FUNCTION_ARGS) ...@@ -612,20 +613,32 @@ array_out(PG_FUNCTION_ARGS)
else else
p += INTALIGN(*(int32 *) p); p += INTALIGN(*(int32 *) p);
/* /* count data plus backslashes; detect chars needing quotes */
* For the pair of double quotes nq = (values[i][0] == '\0'); /* force quotes for empty string */
*/
if (!typbyval)
overall_length += 2;
for (tmp = values[i]; *tmp; tmp++) for (tmp = values[i]; *tmp; tmp++)
{ {
char ch = *tmp;
overall_length += 1; overall_length += 1;
if (ch == '"' || ch == '\\')
{
nq = true;
#ifndef TCL_ARRAYS #ifndef TCL_ARRAYS
if (*tmp == '"')
overall_length += 1; overall_length += 1;
#endif #endif
} }
else if (ch == '{' || ch == '}' || ch == typdelim ||
isspace((unsigned char) ch))
nq = true;
}
needquotes[i] = nq;
/* Count the pair of double quotes, if needed */
if (nq)
overall_length += 2;
/* and the comma */
overall_length += 1; overall_length += 1;
} }
...@@ -634,41 +647,41 @@ array_out(PG_FUNCTION_ARGS) ...@@ -634,41 +647,41 @@ array_out(PG_FUNCTION_ARGS)
*/ */
for (i = j = 0, k = 1; i < ndim; k *= dim[i++], j += k); for (i = j = 0, k = 1; i < ndim; k *= dim[i++], j += k);
p = (char *) palloc(overall_length + 2 * j); retval = (char *) palloc(overall_length + 2 * j);
retval = p; p = retval;
#define APPENDSTR(str) (strcpy(p, (str)), p += strlen(p))
#define APPENDCHAR(ch) (*p++ = (ch), *p = '\0')
strcpy(p, "{"); APPENDCHAR('{');
for (i = 0; i < ndim; indx[i++] = 0); for (i = 0; i < ndim; indx[i++] = 0);
j = 0; j = 0;
k = 0; k = 0;
do do
{ {
for (i = j; i < ndim - 1; i++) for (i = j; i < ndim - 1; i++)
strcat(p, "{"); APPENDCHAR('{');
/* if (needquotes[k])
* Surround anything that is not passed by value in double quotes.
* See above for more details.
*/
if (!typbyval)
{ {
strcat(p, "\""); APPENDCHAR('"');
#ifndef TCL_ARRAYS #ifndef TCL_ARRAYS
l = strlen(p);
for (tmp = values[k]; *tmp; tmp++) for (tmp = values[k]; *tmp; tmp++)
{ {
if (*tmp == '"') char ch = *tmp;
p[l++] = '\\';
p[l++] = *tmp; if (ch == '"' || ch == '\\')
*p++ = '\\';
*p++ = ch;
} }
p[l] = '\0'; *p = '\0';
#else #else
strcat(p, values[k]); APPENDSTR(values[k]);
#endif #endif
strcat(p, "\""); APPENDCHAR('"');
} }
else else
strcat(p, values[k]); APPENDSTR(values[k]);
pfree(values[k++]); pfree(values[k++]);
for (i = ndim - 1; i >= 0; i--) for (i = ndim - 1; i >= 0; i--)
...@@ -676,16 +689,21 @@ array_out(PG_FUNCTION_ARGS) ...@@ -676,16 +689,21 @@ array_out(PG_FUNCTION_ARGS)
indx[i] = (indx[i] + 1) % dim[i]; indx[i] = (indx[i] + 1) % dim[i];
if (indx[i]) if (indx[i])
{ {
strcat(p, delim); APPENDCHAR(typdelim);
break; break;
} }
else else
strcat(p, "}"); APPENDCHAR('}');
} }
j = i; j = i;
} while (j != -1); } while (j != -1);
#undef APPENDSTR
#undef APPENDCHAR
pfree(values); pfree(values);
pfree(needquotes);
PG_RETURN_CSTRING(retval); PG_RETURN_CSTRING(retval);
} }
......
...@@ -29,10 +29,10 @@ INSERT INTO arrtest (a, b[1][2][2], c, d[2][1]) ...@@ -29,10 +29,10 @@ INSERT INTO arrtest (a, b[1][2][2], c, d[2][1])
VALUES ('{}', '{3,4}', '{foo,bar}', '{bar,foo}'); VALUES ('{}', '{3,4}', '{foo,bar}', '{bar,foo}');
SELECT * FROM arrtest; SELECT * FROM arrtest;
a | b | c | d | e | f | g a | b | c | d | e | f | g
-------------+-----------------+---------------+-------------------+---------------+-------------------+----------------- -------------+-----------------+-----------+---------------+-----------+-----------------+-------------
{1,2,3,4,5} | {{{0,0},{1,2}}} | {} | {} | | {} | {} {1,2,3,4,5} | {{{0,0},{1,2}}} | {} | {} | | {} | {}
{11,12,23} | {{3,4},{4,5}} | {"foobar"} | {{"elt1","elt2"}} | {"3.4","6.7"} | {"abc ","abcde"} | {"abc","abcde"} {11,12,23} | {{3,4},{4,5}} | {foobar} | {{elt1,elt2}} | {3.4,6.7} | {"abc ",abcde} | {abc,abcde}
{} | {3,4} | {"foo","bar"} | {"bar","foo"} | | | {} | {3,4} | {foo,bar} | {bar,foo} | | |
(3 rows) (3 rows)
SELECT arrtest.a[1], SELECT arrtest.a[1],
...@@ -63,10 +63,10 @@ SELECT a[1:3], ...@@ -63,10 +63,10 @@ SELECT a[1:3],
d[1:1][1:2] d[1:1][1:2]
FROM arrtest; FROM arrtest;
a | b | c | d a | b | c | d
------------+-----------------+---------------+------------------- ------------+-----------------+-----------+---------------
{1,2,3} | {{{0,0},{1,2}}} | | {1,2,3} | {{{0,0},{1,2}}} | |
{11,12,23} | | {"foobar"} | {{"elt1","elt2"}} {11,12,23} | | {foobar} | {{elt1,elt2}}
| | {"foo","bar"} | | | {foo,bar} |
(3 rows) (3 rows)
SELECT array_dims(a) AS a,array_dims(b) AS b,array_dims(c) AS c SELECT array_dims(a) AS a,array_dims(b) AS b,array_dims(c) AS c
...@@ -99,10 +99,10 @@ UPDATE arrtest ...@@ -99,10 +99,10 @@ UPDATE arrtest
WHERE array_dims(c) is not null; WHERE array_dims(c) is not null;
SELECT a,b,c FROM arrtest; SELECT a,b,c FROM arrtest;
a | b | c a | b | c
---------------+-----------------------+----------------------- ---------------+-----------------------+-------------------
{16,25,3,4,5} | {{{113,142},{1,147}}} | {} {16,25,3,4,5} | {{{113,142},{1,147}}} | {}
{} | {3,4} | {"foo","new_word"} {} | {3,4} | {foo,new_word}
{16,25,23} | {{3,4},{4,5}} | {"foobar","new_word"} {16,25,23} | {{3,4},{4,5}} | {foobar,new_word}
(3 rows) (3 rows)
SELECT a[1:3], SELECT a[1:3],
...@@ -111,9 +111,9 @@ SELECT a[1:3], ...@@ -111,9 +111,9 @@ SELECT a[1:3],
d[1:1][2:2] d[1:1][2:2]
FROM arrtest; FROM arrtest;
a | b | c | d a | b | c | d
------------+-----------------------+-----------------------+------------ ------------+-----------------------+-------------------+----------
{16,25,3} | {{{113,142},{1,147}}} | | {16,25,3} | {{{113,142},{1,147}}} | |
| | {"foo","new_word"} | | | {foo,new_word} |
{16,25,23} | | {"foobar","new_word"} | {{"elt2"}} {16,25,23} | | {foobar,new_word} | {{elt2}}
(3 rows) (3 rows)
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment