Commit 3187d6de authored by Teodor Sigaev's avatar Teodor Sigaev

Introduce parse_ident()

SQL-layer function to split qualified identifier into array parts.

Author: Pavel Stehule with minor editorization by me and Jim Nasby
parent 992b5ba3
...@@ -1818,6 +1818,32 @@ ...@@ -1818,6 +1818,32 @@
<entry><literal>900150983cd24fb0 d6963f7d28e17f72</literal></entry> <entry><literal>900150983cd24fb0 d6963f7d28e17f72</literal></entry>
</row> </row>
<row>
<entry>
<indexterm>
<primary>parse_ident</primary>
</indexterm>
<literal><function>parse_ident(<parameter>str</parameter> <type>text</type>,
[ <parameter>strictmode</parameter> <type>boolean</type> DEFAULT true ] )</function></literal>
</entry>
<entry><type>text[]</type></entry>
<entry>Split <parameter>qualified identifier</parameter> into array
<parameter>parts</parameter>. When <parameter>strictmode</parameter> is
false, extra characters after the identifier are ignored. This is useful
for parsing identifiers for objects like functions and arrays that may
have trailing characters. By default, extra characters after the last
identifier are considered an error, but if second parameter is false,
then chararacters after last identifier are ignored. Note that this
function does not truncate quoted identifiers. If you care about that
you should cast the result of this function to name[]. A non-printable
chararacters (like 0 to 31) are displayed as hexadecimal codes always,
what can be different from PostgreSQL internal SQL identifiers
processing, when the original escaped value is displayed.
</entry>
<entry><literal>parse_ident('"SomeSchema".someTable')</literal></entry>
<entry><literal>"SomeSchema,sometable"</literal></entry>
</row>
<row> <row>
<entry> <entry>
<indexterm> <indexterm>
......
...@@ -990,3 +990,10 @@ RETURNS jsonb ...@@ -990,3 +990,10 @@ RETURNS jsonb
LANGUAGE INTERNAL LANGUAGE INTERNAL
STRICT IMMUTABLE STRICT IMMUTABLE
AS 'jsonb_set'; AS 'jsonb_set';
CREATE OR REPLACE FUNCTION
parse_ident(str text, strict boolean DEFAULT true)
RETURNS text[]
LANGUAGE INTERNAL
STRICT IMMUTABLE
AS 'parse_ident';
...@@ -129,6 +129,15 @@ scanstr(const char *s) ...@@ -129,6 +129,15 @@ scanstr(const char *s)
*/ */
char * char *
downcase_truncate_identifier(const char *ident, int len, bool warn) downcase_truncate_identifier(const char *ident, int len, bool warn)
{
return downcase_identifier(ident, len, warn, true);
}
/*
* a workhorse for downcase_truncate_identifier
*/
char *
downcase_identifier(const char *ident, int len, bool warn, bool truncate)
{ {
char *result; char *result;
int i; int i;
...@@ -158,12 +167,13 @@ downcase_truncate_identifier(const char *ident, int len, bool warn) ...@@ -158,12 +167,13 @@ downcase_truncate_identifier(const char *ident, int len, bool warn)
} }
result[i] = '\0'; result[i] = '\0';
if (i >= NAMEDATALEN) if (i >= NAMEDATALEN && truncate)
truncate_identifier(result, i, warn); truncate_identifier(result, i, warn);
return result; return result;
} }
/* /*
* truncate_identifier() --- truncate an identifier to NAMEDATALEN-1 bytes. * truncate_identifier() --- truncate an identifier to NAMEDATALEN-1 bytes.
* *
......
...@@ -27,6 +27,7 @@ ...@@ -27,6 +27,7 @@
#include "commands/dbcommands.h" #include "commands/dbcommands.h"
#include "funcapi.h" #include "funcapi.h"
#include "miscadmin.h" #include "miscadmin.h"
#include "parser/scansup.h"
#include "parser/keywords.h" #include "parser/keywords.h"
#include "postmaster/syslogger.h" #include "postmaster/syslogger.h"
#include "rewrite/rewriteHandler.h" #include "rewrite/rewriteHandler.h"
...@@ -719,3 +720,226 @@ pg_column_is_updatable(PG_FUNCTION_ARGS) ...@@ -719,3 +720,226 @@ pg_column_is_updatable(PG_FUNCTION_ARGS)
PG_RETURN_BOOL((events & REQ_EVENTS) == REQ_EVENTS); PG_RETURN_BOOL((events & REQ_EVENTS) == REQ_EVENTS);
} }
/*
* This simple parser utility are compatible with lexer implementation,
* used only in parse_ident function
*/
static bool
is_ident_start(unsigned char c)
{
if (c == '_')
return true;
if ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z'))
return true;
if (c >= 0200 && c <= 0377)
return true;
return false;
}
static bool
is_ident_cont(unsigned char c)
{
if (c >= '0' && c <= '9')
return true;
return is_ident_start(c);
}
/*
* Sanitize SQL string for using in error message.
*/
static char *
sanitize_text(text *t)
{
int len = VARSIZE_ANY_EXHDR(t);
const char *p = VARDATA_ANY(t);
StringInfo dstr;
dstr = makeStringInfo();
appendStringInfoChar(dstr, '"');
while (len--)
{
switch (*p)
{
case '\b':
appendStringInfoString(dstr, "\\b");
break;
case '\f':
appendStringInfoString(dstr, "\\f");
break;
case '\n':
appendStringInfoString(dstr, "\\n");
break;
case '\r':
appendStringInfoString(dstr, "\\r");
break;
case '\t':
appendStringInfoString(dstr, "\\t");
break;
case '\'':
appendStringInfoString(dstr, "''");
break;
case '\\':
appendStringInfoString(dstr, "\\\\");
break;
default:
if ((unsigned char) *p < ' ')
appendStringInfo(dstr, "\\u%04x", (int) *p);
else
appendStringInfoCharMacro(dstr, *p);
break;
}
p++;
}
appendStringInfoChar(dstr, '"');
return dstr->data;
}
/*
* parse_ident - parse SQL composed identifier to separate identifiers.
* When strict mode is active (second parameter), then any chars after
* last identifiers are disallowed.
*/
Datum
parse_ident(PG_FUNCTION_ARGS)
{
text *qualname;
char *qualname_str;
bool strict;
char *nextp;
bool after_dot = false;
ArrayBuildState *astate = NULL;
qualname = PG_GETARG_TEXT_PP(0);
qualname_str = text_to_cstring(qualname);
strict = PG_GETARG_BOOL(1);
nextp = qualname_str;
/* skip leading whitespace */
while (isspace((unsigned char) *nextp))
nextp++;
for (;;)
{
char *curname;
char *endp;
bool missing_ident;
missing_ident = true;
if (*nextp == '\"')
{
curname = nextp + 1;
for (;;)
{
endp = strchr(nextp + 1, '\"');
if (endp == NULL)
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("unclosed double quotes"),
errdetail("string %s is not valid identifier",
sanitize_text(qualname))));
if (endp[1] != '\"')
break;
memmove(endp, endp + 1, strlen(endp));
nextp = endp;
}
nextp = endp + 1;
*endp = '\0';
/* Show complete input string in this case. */
if (endp - curname == 0)
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("identifier should not be empty: %s",
sanitize_text(qualname))));
astate = accumArrayResult(astate, CStringGetTextDatum(curname),
false, TEXTOID, CurrentMemoryContext);
missing_ident = false;
}
else
{
if (is_ident_start((unsigned char) *nextp))
{
char *downname;
int len;
text *part;
curname = nextp++;
while (is_ident_cont((unsigned char) *nextp))
nextp++;
len = nextp - curname;
/*
* Unlike name, we don't implicitly truncate identifiers. This
* is useful for allowing the user to check for specific parts
* of the identifier being too long. It's easy enough for the
* user to get the truncated names by casting our output to
* name[].
*/
downname = downcase_identifier(curname, len, false, false);
part = cstring_to_text_with_len(downname, len);
astate = accumArrayResult(astate, PointerGetDatum(part), false,
TEXTOID, CurrentMemoryContext);
missing_ident = false;
}
}
if (missing_ident)
{
/* Different error messages based on where we failed. */
if (*nextp == '.')
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("missing valid identifier before \".\" symbol: %s",
sanitize_text(qualname))));
else if (after_dot)
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("missing valid identifier after \".\" symbol: %s",
sanitize_text(qualname))));
else
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("missing valid identifier: %s",
sanitize_text(qualname))));
}
while (isspace((unsigned char) *nextp))
nextp++;
if (*nextp == '.')
{
after_dot = true;
nextp++;
while (isspace((unsigned char) *nextp))
nextp++;
}
else if (*nextp == '\0')
{
break;
}
else
{
if (strict)
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("identifier contains disallowed characters: %s",
sanitize_text(qualname))));
break;
}
}
PG_RETURN_DATUM(makeArrayResult(astate, CurrentMemoryContext));
}
...@@ -53,6 +53,6 @@ ...@@ -53,6 +53,6 @@
*/ */
/* yyyymmddN */ /* yyyymmddN */
#define CATALOG_VERSION_NO 201603151 #define CATALOG_VERSION_NO 201603181
#endif #endif
...@@ -3463,6 +3463,9 @@ DESCR("I/O"); ...@@ -3463,6 +3463,9 @@ DESCR("I/O");
DATA(insert OID = 4086 ( to_regnamespace PGNSP PGUID 12 1 0 0 0 f f f f t f s s 1 0 4089 "25" _null_ _null_ _null_ _null_ _null_ to_regnamespace _null_ _null_ _null_ )); DATA(insert OID = 4086 ( to_regnamespace PGNSP PGUID 12 1 0 0 0 f f f f t f s s 1 0 4089 "25" _null_ _null_ _null_ _null_ _null_ to_regnamespace _null_ _null_ _null_ ));
DESCR("convert namespace name to regnamespace"); DESCR("convert namespace name to regnamespace");
DATA(insert OID = 1268 ( parse_ident PGNSP PGUID 12 1 0 0 0 f f f f t f i s 2 0 1009 "25 16" _null_ _null_ "{str,strict}" _null_ _null_ parse_ident _null_ _null_ _null_ ));
DESCR("parse qualified identifier to array of identifiers");
DATA(insert OID = 2246 ( fmgr_internal_validator PGNSP PGUID 12 1 0 0 0 f f f f t f s s 1 0 2278 "26" _null_ _null_ _null_ _null_ _null_ fmgr_internal_validator _null_ _null_ _null_ )); DATA(insert OID = 2246 ( fmgr_internal_validator PGNSP PGUID 12 1 0 0 0 f f f f t f s s 1 0 2278 "26" _null_ _null_ _null_ _null_ _null_ fmgr_internal_validator _null_ _null_ _null_ ));
DESCR("(internal)"); DESCR("(internal)");
DATA(insert OID = 2247 ( fmgr_c_validator PGNSP PGUID 12 1 0 0 0 f f f f t f s s 1 0 2278 "26" _null_ _null_ _null_ _null_ _null_ fmgr_c_validator _null_ _null_ _null_ )); DATA(insert OID = 2247 ( fmgr_c_validator PGNSP PGUID 12 1 0 0 0 f f f f t f s s 1 0 2278 "26" _null_ _null_ _null_ _null_ _null_ fmgr_c_validator _null_ _null_ _null_ ));
......
...@@ -20,6 +20,9 @@ extern char *scanstr(const char *s); ...@@ -20,6 +20,9 @@ extern char *scanstr(const char *s);
extern char *downcase_truncate_identifier(const char *ident, int len, extern char *downcase_truncate_identifier(const char *ident, int len,
bool warn); bool warn);
extern char *downcase_identifier(const char *ident, int len,
bool warn, bool truncate);
extern void truncate_identifier(char *ident, int len, bool warn); extern void truncate_identifier(char *ident, int len, bool warn);
extern bool scanner_isspace(char ch); extern bool scanner_isspace(char ch);
......
...@@ -510,6 +510,7 @@ extern Datum pg_typeof(PG_FUNCTION_ARGS); ...@@ -510,6 +510,7 @@ extern Datum pg_typeof(PG_FUNCTION_ARGS);
extern Datum pg_collation_for(PG_FUNCTION_ARGS); extern Datum pg_collation_for(PG_FUNCTION_ARGS);
extern Datum pg_relation_is_updatable(PG_FUNCTION_ARGS); extern Datum pg_relation_is_updatable(PG_FUNCTION_ARGS);
extern Datum pg_column_is_updatable(PG_FUNCTION_ARGS); extern Datum pg_column_is_updatable(PG_FUNCTION_ARGS);
extern Datum parse_ident(PG_FUNCTION_ARGS);
/* oid.c */ /* oid.c */
extern Datum oidin(PG_FUNCTION_ARGS); extern Datum oidin(PG_FUNCTION_ARGS);
......
...@@ -124,3 +124,69 @@ SELECT '' AS two, c.f1 FROM NAME_TBL c WHERE c.f1 ~ '.*asdf.*'; ...@@ -124,3 +124,69 @@ SELECT '' AS two, c.f1 FROM NAME_TBL c WHERE c.f1 ~ '.*asdf.*';
(2 rows) (2 rows)
DROP TABLE NAME_TBL; DROP TABLE NAME_TBL;
DO $$
DECLARE r text[];
BEGIN
r := parse_ident('Schemax.Tabley');
RAISE NOTICE '%', format('%I.%I', r[1], r[2]);
r := parse_ident('"SchemaX"."TableY"');
RAISE NOTICE '%', format('%I.%I', r[1], r[2]);
END;
$$;
NOTICE: schemax.tabley
NOTICE: "SchemaX"."TableY"
SELECT parse_ident('foo.boo');
parse_ident
-------------
{foo,boo}
(1 row)
SELECT parse_ident('foo.boo[]'); -- should fail
ERROR: identifier contains disallowed characters: "foo.boo[]"
SELECT parse_ident('foo.boo[]', strict => false); -- ok
parse_ident
-------------
{foo,boo}
(1 row)
-- should fail
SELECT parse_ident(' ');
ERROR: missing valid identifier: " "
SELECT parse_ident(' .aaa');
ERROR: missing valid identifier before "." symbol: " .aaa"
SELECT parse_ident(' aaa . ');
ERROR: missing valid identifier after "." symbol: " aaa . "
SELECT parse_ident('aaa.a%b');
ERROR: identifier contains disallowed characters: "aaa.a%b"
SELECT parse_ident(E'X\rXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX');
ERROR: identifier contains disallowed characters: "X\rXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX"
SELECT length(a[1]), length(a[2]) from parse_ident('"xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx".yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy') as a ;
length | length
--------+--------
414 | 289
(1 row)
SELECT parse_ident(' first . " second " ." third ". " ' || repeat('x',66) || '"');
parse_ident
-----------------------------------------------------------------------------------------------------------
{first," second "," third "," xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"}
(1 row)
SELECT parse_ident(' first . " second " ." third ". " ' || repeat('x',66) || '"')::name[];
parse_ident
------------------------------------------------------------------------------------------------------
{first," second "," third "," xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"}
(1 row)
SELECT parse_ident(E'"c".X XXXX\002XXXXXX');
ERROR: identifier contains disallowed characters: ""c".X XXXX\u0002XXXXXX"
SELECT parse_ident('1020');
ERROR: missing valid identifier: "1020"
SELECT parse_ident('10.20');
ERROR: missing valid identifier: "10.20"
SELECT parse_ident('.');
ERROR: missing valid identifier before "." symbol: "."
SELECT parse_ident('.1020');
ERROR: missing valid identifier before "." symbol: ".1020"
SELECT parse_ident('xxx.1020');
ERROR: missing valid identifier after "." symbol: "xxx.1020"
...@@ -52,3 +52,36 @@ SELECT '' AS three, c.f1 FROM NAME_TBL c WHERE c.f1 ~ '[0-9]'; ...@@ -52,3 +52,36 @@ SELECT '' AS three, c.f1 FROM NAME_TBL c WHERE c.f1 ~ '[0-9]';
SELECT '' AS two, c.f1 FROM NAME_TBL c WHERE c.f1 ~ '.*asdf.*'; SELECT '' AS two, c.f1 FROM NAME_TBL c WHERE c.f1 ~ '.*asdf.*';
DROP TABLE NAME_TBL; DROP TABLE NAME_TBL;
DO $$
DECLARE r text[];
BEGIN
r := parse_ident('Schemax.Tabley');
RAISE NOTICE '%', format('%I.%I', r[1], r[2]);
r := parse_ident('"SchemaX"."TableY"');
RAISE NOTICE '%', format('%I.%I', r[1], r[2]);
END;
$$;
SELECT parse_ident('foo.boo');
SELECT parse_ident('foo.boo[]'); -- should fail
SELECT parse_ident('foo.boo[]', strict => false); -- ok
-- should fail
SELECT parse_ident(' ');
SELECT parse_ident(' .aaa');
SELECT parse_ident(' aaa . ');
SELECT parse_ident('aaa.a%b');
SELECT parse_ident(E'X\rXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX');
SELECT length(a[1]), length(a[2]) from parse_ident('"xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx".yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy') as a ;
SELECT parse_ident(' first . " second " ." third ". " ' || repeat('x',66) || '"');
SELECT parse_ident(' first . " second " ." third ". " ' || repeat('x',66) || '"')::name[];
SELECT parse_ident(E'"c".X XXXX\002XXXXXX');
SELECT parse_ident('1020');
SELECT parse_ident('10.20');
SELECT parse_ident('.');
SELECT parse_ident('.1020');
SELECT parse_ident('xxx.1020');
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment