Commit d12e5bb7 authored by Tom Lane's avatar Tom Lane

Code and docs review for commit 3187d6de.

Fix up check for high-bit-set characters, which provoked "comparison is
always true due to limited range of data type" warnings on some compilers,
and was unlike the way we do it elsewhere anyway.  Fix omission of "$"
from the set of valid identifier continuation characters.  Get rid of
sanitize_text(), which was utterly inconsistent with any other error report
anywhere in the system, and wasn't even well designed on its own terms
(double-quoting the result string without escaping contained double quotes
doesn't seem very well thought out).  Fix up error messages, which didn't
follow the message style guidelines very well, and were overly specific in
situations where the actual mistake might not be what they said.  Improve
documentation.

(I started out just intending to fix the compiler warning, but the more
I looked at the patch the less I liked it.)
parent 499a5057
...@@ -1823,25 +1823,22 @@ ...@@ -1823,25 +1823,22 @@
<indexterm> <indexterm>
<primary>parse_ident</primary> <primary>parse_ident</primary>
</indexterm> </indexterm>
<literal><function>parse_ident(<parameter>str</parameter> <type>text</type>, <literal><function>parse_ident(<parameter>qualified_identifier</parameter> <type>text</type>
[ <parameter>strictmode</parameter> <type>boolean</type> DEFAULT true ] )</function></literal> [, <parameter>strictmode</parameter> <type>boolean</type> DEFAULT true ] )</function></literal>
</entry> </entry>
<entry><type>text[]</type></entry> <entry><type>text[]</type></entry>
<entry>Split <parameter>qualified identifier</parameter> into array <entry>
<parameter>parts</parameter>. When <parameter>strictmode</parameter> is Split <parameter>qualified_identifier</parameter> into an array of
false, extra characters after the identifier are ignored. This is useful identifiers, removing any quoting of individual identifiers. By
for parsing identifiers for objects like functions and arrays that may default, extra characters after the last identifier are considered an
have trailing characters. By default, extra characters after the last error; but if the second parameter is <literal>false</>, then such
identifier are considered an error, but if the second parameter is false, extra characters are ignored. (This behavior is useful for parsing
then the characters after the last identifier are ignored. Note that this names for objects like functions.) Note that this function does not
function does not truncate quoted identifiers. If you care about that truncate over-length identifiers. If you want truncation you can cast
you should cast the result of this function to name[]. Non-printable the result to <type>name[]</>.
characters (like 0 to 31) are always displayed as hexadecimal codes,
which can be different from PostgreSQL internal SQL identifiers
processing, when the original escaped value is displayed.
</entry> </entry>
<entry><literal>parse_ident('"SomeSchema".someTable')</literal></entry> <entry><literal>parse_ident('"SomeSchema".someTable')</literal></entry>
<entry><literal>"SomeSchema,sometable"</literal></entry> <entry><literal>{SomeSchema,sometable}</literal></entry>
</row> </row>
<row> <row>
......
...@@ -723,105 +723,57 @@ pg_column_is_updatable(PG_FUNCTION_ARGS) ...@@ -723,105 +723,57 @@ pg_column_is_updatable(PG_FUNCTION_ARGS)
/* /*
* This simple parser utility are compatible with lexer implementation, * Is character a valid identifier start?
* used only in parse_ident function * Must match scan.l's {ident_start} character class.
*/ */
static bool static bool
is_ident_start(unsigned char c) is_ident_start(unsigned char c)
{ {
/* Underscores and ASCII letters are OK */
if (c == '_') if (c == '_')
return true; return true;
if ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')) if ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z'))
return true; return true;
/* Any high-bit-set character is OK (might be part of a multibyte char) */
if (c >= 0200 && c <= 0377) if (IS_HIGHBIT_SET(c))
return true; return true;
return false; return false;
} }
/*
* Is character a valid identifier continuation?
* Must match scan.l's {ident_cont} character class.
*/
static bool static bool
is_ident_cont(unsigned char c) is_ident_cont(unsigned char c)
{ {
if (c >= '0' && c <= '9') /* Can be digit or dollar sign ... */
if ((c >= '0' && c <= '9') || c == '$')
return true; return true;
/* ... or an identifier start character */
return is_ident_start(c); return is_ident_start(c);
} }
/* /*
* Sanitize SQL string for using in error message. * parse_ident - parse a SQL qualified identifier into separate identifiers.
*/
static char *
sanitize_text(text *t)
{
int len = VARSIZE_ANY_EXHDR(t);
const char *p = VARDATA_ANY(t);
StringInfo dstr;
dstr = makeStringInfo();
appendStringInfoChar(dstr, '"');
while (len--)
{
switch (*p)
{
case '\b':
appendStringInfoString(dstr, "\\b");
break;
case '\f':
appendStringInfoString(dstr, "\\f");
break;
case '\n':
appendStringInfoString(dstr, "\\n");
break;
case '\r':
appendStringInfoString(dstr, "\\r");
break;
case '\t':
appendStringInfoString(dstr, "\\t");
break;
case '\'':
appendStringInfoString(dstr, "''");
break;
case '\\':
appendStringInfoString(dstr, "\\\\");
break;
default:
if ((unsigned char) *p < ' ')
appendStringInfo(dstr, "\\u%04x", (int) *p);
else
appendStringInfoCharMacro(dstr, *p);
break;
}
p++;
}
appendStringInfoChar(dstr, '"');
return dstr->data;
}
/*
* parse_ident - parse SQL composed identifier to separate identifiers.
* When strict mode is active (second parameter), then any chars after * When strict mode is active (second parameter), then any chars after
* last identifiers are disallowed. * the last identifier are disallowed.
*/ */
Datum Datum
parse_ident(PG_FUNCTION_ARGS) parse_ident(PG_FUNCTION_ARGS)
{ {
text *qualname; text *qualname = PG_GETARG_TEXT_PP(0);
char *qualname_str; bool strict = PG_GETARG_BOOL(1);
bool strict; char *qualname_str = text_to_cstring(qualname);
ArrayBuildState *astate = NULL;
char *nextp; char *nextp;
bool after_dot = false; bool after_dot = false;
ArrayBuildState *astate = NULL;
qualname = PG_GETARG_TEXT_PP(0);
qualname_str = text_to_cstring(qualname);
strict = PG_GETARG_BOOL(1);
/*
* The code below scribbles on qualname_str in some cases, so we should
* reconvert qualname if we need to show the original string in error
* messages.
*/
nextp = qualname_str; nextp = qualname_str;
/* skip leading whitespace */ /* skip leading whitespace */
...@@ -830,25 +782,24 @@ parse_ident(PG_FUNCTION_ARGS) ...@@ -830,25 +782,24 @@ parse_ident(PG_FUNCTION_ARGS)
for (;;) for (;;)
{ {
char *curname; char *curname;
char *endp; bool missing_ident = true;
bool missing_ident;
missing_ident = true;
if (*nextp == '\"') if (*nextp == '"')
{ {
char *endp;
curname = nextp + 1; curname = nextp + 1;
for (;;) for (;;)
{ {
endp = strchr(nextp + 1, '\"'); endp = strchr(nextp + 1, '"');
if (endp == NULL) if (endp == NULL)
ereport(ERROR, ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE), (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("unclosed double quotes"), errmsg("string is not a valid identifier: \"%s\"",
errdetail("string %s is not valid identifier", text_to_cstring(qualname)),
sanitize_text(qualname)))); errdetail("String has unclosed double quotes.")));
if (endp[1] != '\"') if (endp[1] != '"')
break; break;
memmove(endp, endp + 1, strlen(endp)); memmove(endp, endp + 1, strlen(endp));
nextp = endp; nextp = endp;
...@@ -856,44 +807,40 @@ parse_ident(PG_FUNCTION_ARGS) ...@@ -856,44 +807,40 @@ parse_ident(PG_FUNCTION_ARGS)
nextp = endp + 1; nextp = endp + 1;
*endp = '\0'; *endp = '\0';
/* Show complete input string in this case. */
if (endp - curname == 0) if (endp - curname == 0)
ereport(ERROR, ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE), (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("identifier should not be empty: %s", errmsg("string is not a valid identifier: \"%s\"",
sanitize_text(qualname)))); text_to_cstring(qualname)),
errdetail("Quoted identifier must not be empty.")));
astate = accumArrayResult(astate, CStringGetTextDatum(curname), astate = accumArrayResult(astate, CStringGetTextDatum(curname),
false, TEXTOID, CurrentMemoryContext); false, TEXTOID, CurrentMemoryContext);
missing_ident = false; missing_ident = false;
} }
else else if (is_ident_start((unsigned char) *nextp))
{ {
if (is_ident_start((unsigned char) *nextp)) char *downname;
{ int len;
char *downname; text *part;
int len;
text *part; curname = nextp++;
while (is_ident_cont((unsigned char) *nextp))
curname = nextp++; nextp++;
while (is_ident_cont((unsigned char) *nextp))
nextp++; len = nextp - curname;
len = nextp - curname; /*
* We don't implicitly truncate identifiers. This is useful for
/* * allowing the user to check for specific parts of the identifier
* Unlike name, we don't implicitly truncate identifiers. This * being too long. It's easy enough for the user to get the
* is useful for allowing the user to check for specific parts * truncated names by casting our output to name[].
* of the identifier being too long. It's easy enough for the */
* user to get the truncated names by casting our output to downname = downcase_identifier(curname, len, false, false);
* name[]. part = cstring_to_text_with_len(downname, len);
*/ astate = accumArrayResult(astate, PointerGetDatum(part), false,
downname = downcase_identifier(curname, len, false, false); TEXTOID, CurrentMemoryContext);
part = cstring_to_text_with_len(downname, len); missing_ident = false;
astate = accumArrayResult(astate, PointerGetDatum(part), false,
TEXTOID, CurrentMemoryContext);
missing_ident = false;
}
} }
if (missing_ident) if (missing_ident)
...@@ -901,19 +848,21 @@ parse_ident(PG_FUNCTION_ARGS) ...@@ -901,19 +848,21 @@ parse_ident(PG_FUNCTION_ARGS)
/* Different error messages based on where we failed. */ /* Different error messages based on where we failed. */
if (*nextp == '.') if (*nextp == '.')
ereport(ERROR, ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE), (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("missing valid identifier before \".\" symbol: %s", errmsg("string is not a valid identifier: \"%s\"",
sanitize_text(qualname)))); text_to_cstring(qualname)),
errdetail("No valid identifier before \".\" symbol.")));
else if (after_dot) else if (after_dot)
ereport(ERROR, ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE), (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("missing valid identifier after \".\" symbol: %s", errmsg("string is not a valid identifier: \"%s\"",
sanitize_text(qualname)))); text_to_cstring(qualname)),
errdetail("No valid identifier after \".\" symbol.")));
else else
ereport(ERROR, ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE), (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("missing valid identifier: %s", errmsg("string is not a valid identifier: \"%s\"",
sanitize_text(qualname)))); text_to_cstring(qualname))));
} }
while (isspace((unsigned char) *nextp)) while (isspace((unsigned char) *nextp))
...@@ -934,9 +883,9 @@ parse_ident(PG_FUNCTION_ARGS) ...@@ -934,9 +883,9 @@ parse_ident(PG_FUNCTION_ARGS)
{ {
if (strict) if (strict)
ereport(ERROR, ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE), (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("identifier contains disallowed characters: %s", errmsg("string is not a valid identifier: \"%s\"",
sanitize_text(qualname)))); text_to_cstring(qualname))));
break; break;
} }
} }
......
...@@ -142,7 +142,7 @@ SELECT parse_ident('foo.boo'); ...@@ -142,7 +142,7 @@ SELECT parse_ident('foo.boo');
(1 row) (1 row)
SELECT parse_ident('foo.boo[]'); -- should fail SELECT parse_ident('foo.boo[]'); -- should fail
ERROR: identifier contains disallowed characters: "foo.boo[]" ERROR: string is not a valid identifier: "foo.boo[]"
SELECT parse_ident('foo.boo[]', strict => false); -- ok SELECT parse_ident('foo.boo[]', strict => false); -- ok
parse_ident parse_ident
------------- -------------
...@@ -151,15 +151,17 @@ SELECT parse_ident('foo.boo[]', strict => false); -- ok ...@@ -151,15 +151,17 @@ SELECT parse_ident('foo.boo[]', strict => false); -- ok
-- should fail -- should fail
SELECT parse_ident(' '); SELECT parse_ident(' ');
ERROR: missing valid identifier: " " ERROR: string is not a valid identifier: " "
SELECT parse_ident(' .aaa'); SELECT parse_ident(' .aaa');
ERROR: missing valid identifier before "." symbol: " .aaa" ERROR: string is not a valid identifier: " .aaa"
DETAIL: No valid identifier before "." symbol.
SELECT parse_ident(' aaa . '); SELECT parse_ident(' aaa . ');
ERROR: missing valid identifier after "." symbol: " aaa . " ERROR: string is not a valid identifier: " aaa . "
DETAIL: No valid identifier after "." symbol.
SELECT parse_ident('aaa.a%b'); SELECT parse_ident('aaa.a%b');
ERROR: identifier contains disallowed characters: "aaa.a%b" ERROR: string is not a valid identifier: "aaa.a%b"
SELECT parse_ident(E'X\rXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX'); SELECT parse_ident(E'X\rXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX');
ERROR: identifier contains disallowed characters: "X\rXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX" ERROR: string is not a valid identifier: "X XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX"
SELECT length(a[1]), length(a[2]) from parse_ident('"xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx".yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy') as a ; SELECT length(a[1]), length(a[2]) from parse_ident('"xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx".yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy') as a ;
length | length length | length
--------+-------- --------+--------
...@@ -179,14 +181,17 @@ SELECT parse_ident(' first . " second " ." third ". " ' || repeat('x',66) ...@@ -179,14 +181,17 @@ SELECT parse_ident(' first . " second " ." third ". " ' || repeat('x',66)
(1 row) (1 row)
SELECT parse_ident(E'"c".X XXXX\002XXXXXX'); SELECT parse_ident(E'"c".X XXXX\002XXXXXX');
ERROR: identifier contains disallowed characters: ""c".X XXXX\u0002XXXXXX" ERROR: string is not a valid identifier: ""c".X XXXXXXXXXX"
SELECT parse_ident('1020'); SELECT parse_ident('1020');
ERROR: missing valid identifier: "1020" ERROR: string is not a valid identifier: "1020"
SELECT parse_ident('10.20'); SELECT parse_ident('10.20');
ERROR: missing valid identifier: "10.20" ERROR: string is not a valid identifier: "10.20"
SELECT parse_ident('.'); SELECT parse_ident('.');
ERROR: missing valid identifier before "." symbol: "." ERROR: string is not a valid identifier: "."
DETAIL: No valid identifier before "." symbol.
SELECT parse_ident('.1020'); SELECT parse_ident('.1020');
ERROR: missing valid identifier before "." symbol: ".1020" ERROR: string is not a valid identifier: ".1020"
DETAIL: No valid identifier before "." symbol.
SELECT parse_ident('xxx.1020'); SELECT parse_ident('xxx.1020');
ERROR: missing valid identifier after "." symbol: "xxx.1020" ERROR: string is not a valid identifier: "xxx.1020"
DETAIL: No valid identifier after "." symbol.
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment