Commit d12e5bb7 authored by Tom Lane's avatar Tom Lane

Code and docs review for commit 3187d6de.

Fix up check for high-bit-set characters, which provoked "comparison is
always true due to limited range of data type" warnings on some compilers,
and was unlike the way we do it elsewhere anyway.  Fix omission of "$"
from the set of valid identifier continuation characters.  Get rid of
sanitize_text(), which was utterly inconsistent with any other error report
anywhere in the system, and wasn't even well designed on its own terms
(double-quoting the result string without escaping contained double quotes
doesn't seem very well thought out).  Fix up error messages, which didn't
follow the message style guidelines very well, and were overly specific in
situations where the actual mistake might not be what they said.  Improve
documentation.

(I started out just intending to fix the compiler warning, but the more
I looked at the patch the less I liked it.)
parent 499a5057
......@@ -1823,25 +1823,22 @@
<indexterm>
<primary>parse_ident</primary>
</indexterm>
<literal><function>parse_ident(<parameter>str</parameter> <type>text</type>,
[ <parameter>strictmode</parameter> <type>boolean</type> DEFAULT true ] )</function></literal>
<literal><function>parse_ident(<parameter>qualified_identifier</parameter> <type>text</type>
[, <parameter>strictmode</parameter> <type>boolean</type> DEFAULT true ] )</function></literal>
</entry>
<entry><type>text[]</type></entry>
<entry>Split <parameter>qualified identifier</parameter> into array
<parameter>parts</parameter>. When <parameter>strictmode</parameter> is
false, extra characters after the identifier are ignored. This is useful
for parsing identifiers for objects like functions and arrays that may
have trailing characters. By default, extra characters after the last
identifier are considered an error, but if the second parameter is false,
then the characters after the last identifier are ignored. Note that this
function does not truncate quoted identifiers. If you care about that
you should cast the result of this function to name[]. Non-printable
characters (like 0 to 31) are always displayed as hexadecimal codes,
which can be different from PostgreSQL internal SQL identifiers
processing, when the original escaped value is displayed.
<entry>
Split <parameter>qualified_identifier</parameter> into an array of
identifiers, removing any quoting of individual identifiers. By
default, extra characters after the last identifier are considered an
error; but if the second parameter is <literal>false</>, then such
extra characters are ignored. (This behavior is useful for parsing
names for objects like functions.) Note that this function does not
truncate over-length identifiers. If you want truncation you can cast
the result to <type>name[]</>.
</entry>
<entry><literal>parse_ident('"SomeSchema".someTable')</literal></entry>
<entry><literal>"SomeSchema,sometable"</literal></entry>
<entry><literal>{SomeSchema,sometable}</literal></entry>
</row>
<row>
......
......@@ -723,105 +723,57 @@ pg_column_is_updatable(PG_FUNCTION_ARGS)
/*
* This simple parser utility are compatible with lexer implementation,
* used only in parse_ident function
* Is character a valid identifier start?
* Must match scan.l's {ident_start} character class.
*/
static bool
is_ident_start(unsigned char c)
{
/* Underscores and ASCII letters are OK */
if (c == '_')
return true;
if ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z'))
return true;
if (c >= 0200 && c <= 0377)
/* Any high-bit-set character is OK (might be part of a multibyte char) */
if (IS_HIGHBIT_SET(c))
return true;
return false;
}
/*
* Is character a valid identifier continuation?
* Must match scan.l's {ident_cont} character class.
*/
static bool
is_ident_cont(unsigned char c)
{
if (c >= '0' && c <= '9')
/* Can be digit or dollar sign ... */
if ((c >= '0' && c <= '9') || c == '$')
return true;
/* ... or an identifier start character */
return is_ident_start(c);
}
/*
* Sanitize SQL string for using in error message.
*/
static char *
sanitize_text(text *t)
{
int len = VARSIZE_ANY_EXHDR(t);
const char *p = VARDATA_ANY(t);
StringInfo dstr;
dstr = makeStringInfo();
appendStringInfoChar(dstr, '"');
while (len--)
{
switch (*p)
{
case '\b':
appendStringInfoString(dstr, "\\b");
break;
case '\f':
appendStringInfoString(dstr, "\\f");
break;
case '\n':
appendStringInfoString(dstr, "\\n");
break;
case '\r':
appendStringInfoString(dstr, "\\r");
break;
case '\t':
appendStringInfoString(dstr, "\\t");
break;
case '\'':
appendStringInfoString(dstr, "''");
break;
case '\\':
appendStringInfoString(dstr, "\\\\");
break;
default:
if ((unsigned char) *p < ' ')
appendStringInfo(dstr, "\\u%04x", (int) *p);
else
appendStringInfoCharMacro(dstr, *p);
break;
}
p++;
}
appendStringInfoChar(dstr, '"');
return dstr->data;
}
/*
* parse_ident - parse SQL composed identifier to separate identifiers.
* parse_ident - parse a SQL qualified identifier into separate identifiers.
* When strict mode is active (second parameter), then any chars after
* last identifiers are disallowed.
* the last identifier are disallowed.
*/
Datum
parse_ident(PG_FUNCTION_ARGS)
{
text *qualname;
char *qualname_str;
bool strict;
text *qualname = PG_GETARG_TEXT_PP(0);
bool strict = PG_GETARG_BOOL(1);
char *qualname_str = text_to_cstring(qualname);
ArrayBuildState *astate = NULL;
char *nextp;
bool after_dot = false;
ArrayBuildState *astate = NULL;
qualname = PG_GETARG_TEXT_PP(0);
qualname_str = text_to_cstring(qualname);
strict = PG_GETARG_BOOL(1);
/*
* The code below scribbles on qualname_str in some cases, so we should
* reconvert qualname if we need to show the original string in error
* messages.
*/
nextp = qualname_str;
/* skip leading whitespace */
......@@ -831,24 +783,23 @@ parse_ident(PG_FUNCTION_ARGS)
for (;;)
{
char *curname;
char *endp;
bool missing_ident;
missing_ident = true;
bool missing_ident = true;
if (*nextp == '\"')
if (*nextp == '"')
{
char *endp;
curname = nextp + 1;
for (;;)
{
endp = strchr(nextp + 1, '\"');
endp = strchr(nextp + 1, '"');
if (endp == NULL)
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("unclosed double quotes"),
errdetail("string %s is not valid identifier",
sanitize_text(qualname))));
if (endp[1] != '\"')
errmsg("string is not a valid identifier: \"%s\"",
text_to_cstring(qualname)),
errdetail("String has unclosed double quotes.")));
if (endp[1] != '"')
break;
memmove(endp, endp + 1, strlen(endp));
nextp = endp;
......@@ -856,20 +807,18 @@ parse_ident(PG_FUNCTION_ARGS)
nextp = endp + 1;
*endp = '\0';
/* Show complete input string in this case. */
if (endp - curname == 0)
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("identifier should not be empty: %s",
sanitize_text(qualname))));
errmsg("string is not a valid identifier: \"%s\"",
text_to_cstring(qualname)),
errdetail("Quoted identifier must not be empty.")));
astate = accumArrayResult(astate, CStringGetTextDatum(curname),
false, TEXTOID, CurrentMemoryContext);
missing_ident = false;
}
else
{
if (is_ident_start((unsigned char) *nextp))
else if (is_ident_start((unsigned char) *nextp))
{
char *downname;
int len;
......@@ -882,11 +831,10 @@ parse_ident(PG_FUNCTION_ARGS)
len = nextp - curname;
/*
* Unlike name, we don't implicitly truncate identifiers. This
* is useful for allowing the user to check for specific parts
* of the identifier being too long. It's easy enough for the
* user to get the truncated names by casting our output to
* name[].
* We don't implicitly truncate identifiers. This is useful for
* allowing the user to check for specific parts of the identifier
* being too long. It's easy enough for the user to get the
* truncated names by casting our output to name[].
*/
downname = downcase_identifier(curname, len, false, false);
part = cstring_to_text_with_len(downname, len);
......@@ -894,7 +842,6 @@ parse_ident(PG_FUNCTION_ARGS)
TEXTOID, CurrentMemoryContext);
missing_ident = false;
}
}
if (missing_ident)
{
......@@ -902,18 +849,20 @@ parse_ident(PG_FUNCTION_ARGS)
if (*nextp == '.')
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("missing valid identifier before \".\" symbol: %s",
sanitize_text(qualname))));
errmsg("string is not a valid identifier: \"%s\"",
text_to_cstring(qualname)),
errdetail("No valid identifier before \".\" symbol.")));
else if (after_dot)
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("missing valid identifier after \".\" symbol: %s",
sanitize_text(qualname))));
errmsg("string is not a valid identifier: \"%s\"",
text_to_cstring(qualname)),
errdetail("No valid identifier after \".\" symbol.")));
else
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("missing valid identifier: %s",
sanitize_text(qualname))));
errmsg("string is not a valid identifier: \"%s\"",
text_to_cstring(qualname))));
}
while (isspace((unsigned char) *nextp))
......@@ -935,8 +884,8 @@ parse_ident(PG_FUNCTION_ARGS)
if (strict)
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("identifier contains disallowed characters: %s",
sanitize_text(qualname))));
errmsg("string is not a valid identifier: \"%s\"",
text_to_cstring(qualname))));
break;
}
}
......
......@@ -142,7 +142,7 @@ SELECT parse_ident('foo.boo');
(1 row)
SELECT parse_ident('foo.boo[]'); -- should fail
ERROR: identifier contains disallowed characters: "foo.boo[]"
ERROR: string is not a valid identifier: "foo.boo[]"
SELECT parse_ident('foo.boo[]', strict => false); -- ok
parse_ident
-------------
......@@ -151,15 +151,17 @@ SELECT parse_ident('foo.boo[]', strict => false); -- ok
-- should fail
SELECT parse_ident(' ');
ERROR: missing valid identifier: " "
ERROR: string is not a valid identifier: " "
SELECT parse_ident(' .aaa');
ERROR: missing valid identifier before "." symbol: " .aaa"
ERROR: string is not a valid identifier: " .aaa"
DETAIL: No valid identifier before "." symbol.
SELECT parse_ident(' aaa . ');
ERROR: missing valid identifier after "." symbol: " aaa . "
ERROR: string is not a valid identifier: " aaa . "
DETAIL: No valid identifier after "." symbol.
SELECT parse_ident('aaa.a%b');
ERROR: identifier contains disallowed characters: "aaa.a%b"
ERROR: string is not a valid identifier: "aaa.a%b"
SELECT parse_ident(E'X\rXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX');
ERROR: identifier contains disallowed characters: "X\rXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX"
ERROR: string is not a valid identifier: "X XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX"
SELECT length(a[1]), length(a[2]) from parse_ident('"xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx".yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy') as a ;
length | length
--------+--------
......@@ -179,14 +181,17 @@ SELECT parse_ident(' first . " second " ." third ". " ' || repeat('x',66)
(1 row)
SELECT parse_ident(E'"c".X XXXX\002XXXXXX');
ERROR: identifier contains disallowed characters: ""c".X XXXX\u0002XXXXXX"
ERROR: string is not a valid identifier: ""c".X XXXXXXXXXX"
SELECT parse_ident('1020');
ERROR: missing valid identifier: "1020"
ERROR: string is not a valid identifier: "1020"
SELECT parse_ident('10.20');
ERROR: missing valid identifier: "10.20"
ERROR: string is not a valid identifier: "10.20"
SELECT parse_ident('.');
ERROR: missing valid identifier before "." symbol: "."
ERROR: string is not a valid identifier: "."
DETAIL: No valid identifier before "." symbol.
SELECT parse_ident('.1020');
ERROR: missing valid identifier before "." symbol: ".1020"
ERROR: string is not a valid identifier: ".1020"
DETAIL: No valid identifier before "." symbol.
SELECT parse_ident('xxx.1020');
ERROR: missing valid identifier after "." symbol: "xxx.1020"
ERROR: string is not a valid identifier: "xxx.1020"
DETAIL: No valid identifier after "." symbol.
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment