Commit 95cacd13 authored by Tom Lane's avatar Tom Lane

Use a non-locale-dependent definition of isspace() in array_in/array_out.

array_in discards unquoted leading and trailing whitespace in array values,
while array_out is careful to quote array elements that contain whitespace.
This is problematic when the definition of "whitespace" varies between
locales: array_in could drop characters that were meant to be part of the
value.  To avoid that, lock down "whitespace" to mean only the traditional
six ASCII space characters.

This change also works around a bug in OS X and some older BSD systems, in
which isspace() could return true for character fragments in UTF8 locales.
(There may be other places in PG where that bug could cause problems, but
this is the only one complained of so far; see recent report from Steven
Schlansker.)

Back-patch to 9.0, but not further.  Given the lack of previous reports
of trouble, changing this behavior in stable branches seems to offer
more risk of breaking applications than reward of avoiding problems.
parent c5d6d5bc
...@@ -8,7 +8,7 @@ ...@@ -8,7 +8,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/utils/adt/arrayfuncs.c,v 1.165 2010/08/11 19:12:27 heikki Exp $ * $PostgreSQL: pgsql/src/backend/utils/adt/arrayfuncs.c,v 1.166 2010/08/21 16:55:51 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -50,6 +50,7 @@ typedef enum ...@@ -50,6 +50,7 @@ typedef enum
ARRAY_LEVEL_DELIMITED ARRAY_LEVEL_DELIMITED
} ArrayParseState; } ArrayParseState;
static bool array_isspace(char ch);
static int ArrayCount(const char *str, int *dim, char typdelim); static int ArrayCount(const char *str, int *dim, char typdelim);
static void ReadArrayStr(char *arrayStr, const char *origStr, static void ReadArrayStr(char *arrayStr, const char *origStr,
int nitems, int ndim, int *dim, int nitems, int ndim, int *dim,
...@@ -192,7 +193,7 @@ array_in(PG_FUNCTION_ARGS) ...@@ -192,7 +193,7 @@ array_in(PG_FUNCTION_ARGS)
* Note: we currently allow whitespace between, but not within, * Note: we currently allow whitespace between, but not within,
* dimension items. * dimension items.
*/ */
while (isspace((unsigned char) *p)) while (array_isspace(*p))
p++; p++;
if (*p != '[') if (*p != '[')
break; /* no more dimension items */ break; /* no more dimension items */
...@@ -265,7 +266,7 @@ array_in(PG_FUNCTION_ARGS) ...@@ -265,7 +266,7 @@ array_in(PG_FUNCTION_ARGS)
(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
errmsg("missing assignment operator"))); errmsg("missing assignment operator")));
p += strlen(ASSGN); p += strlen(ASSGN);
while (isspace((unsigned char) *p)) while (array_isspace(*p))
p++; p++;
/* /*
...@@ -350,6 +351,27 @@ array_in(PG_FUNCTION_ARGS) ...@@ -350,6 +351,27 @@ array_in(PG_FUNCTION_ARGS)
PG_RETURN_ARRAYTYPE_P(retval); PG_RETURN_ARRAYTYPE_P(retval);
} }
/*
* array_isspace() --- a non-locale-dependent isspace()
*
* We used to use isspace() for parsing array values, but that has
* undesirable results: an array value might be silently interpreted
* differently depending on the locale setting. Now we just hard-wire
* the traditional ASCII definition of isspace().
*/
static bool
array_isspace(char ch)
{
if (ch == ' ' ||
ch == '\t' ||
ch == '\n' ||
ch == '\r' ||
ch == '\v' ||
ch == '\f')
return true;
return false;
}
/* /*
* ArrayCount * ArrayCount
* Determines the dimensions for an array string. * Determines the dimensions for an array string.
...@@ -534,7 +556,7 @@ ArrayCount(const char *str, int *dim, char typdelim) ...@@ -534,7 +556,7 @@ ArrayCount(const char *str, int *dim, char typdelim)
itemdone = true; itemdone = true;
nelems[nest_level - 1]++; nelems[nest_level - 1]++;
} }
else if (!isspace((unsigned char) *ptr)) else if (!array_isspace(*ptr))
{ {
/* /*
* Other non-space characters must be after a * Other non-space characters must be after a
...@@ -563,7 +585,7 @@ ArrayCount(const char *str, int *dim, char typdelim) ...@@ -563,7 +585,7 @@ ArrayCount(const char *str, int *dim, char typdelim)
/* only whitespace is allowed after the closing brace */ /* only whitespace is allowed after the closing brace */
while (*ptr) while (*ptr)
{ {
if (!isspace((unsigned char) *ptr++)) if (!array_isspace(*ptr++))
ereport(ERROR, ereport(ERROR,
(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
errmsg("malformed array literal: \"%s\"", str))); errmsg("malformed array literal: \"%s\"", str)));
...@@ -756,7 +778,7 @@ ReadArrayStr(char *arrayStr, ...@@ -756,7 +778,7 @@ ReadArrayStr(char *arrayStr,
indx[ndim - 1]++; indx[ndim - 1]++;
srcptr++; srcptr++;
} }
else if (isspace((unsigned char) *srcptr)) else if (array_isspace(*srcptr))
{ {
/* /*
* If leading space, drop it immediately. Else, copy * If leading space, drop it immediately. Else, copy
...@@ -1044,7 +1066,7 @@ array_out(PG_FUNCTION_ARGS) ...@@ -1044,7 +1066,7 @@ array_out(PG_FUNCTION_ARGS)
overall_length += 1; overall_length += 1;
} }
else if (ch == '{' || ch == '}' || ch == typdelim || else if (ch == '{' || ch == '}' || ch == typdelim ||
isspace((unsigned char) ch)) array_isspace(ch))
needquote = true; needquote = true;
} }
} }
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment