Commit 17ca0679 authored by Tom Lane's avatar Tom Lane

Clean up parsing of ltree and lquery some more.

Fix lquery parsing to handle repeated flag characters correctly,
and to enforce the max label length correctly in some cases where
it did not before, and to detect empty labels in some cases where
it did not before.

In a more cosmetic vein, use a switch rather than if-then chains to
handle the different states, and avoid unnecessary checks on charlen
when looking for ASCII characters, and factor out multiple copies of
the label length checking code.

Tom Lane and Dmitry Belyavsky

Discussion: https://postgr.es/m/CADqLbzLVkBuPX0812o+z=c3i6honszsZZ6VQOSKR3VPbB56P3w@mail.gmail.com
parent 949a9f04
......@@ -31,6 +31,29 @@ SELECT '1.2._3'::ltree;
1.2._3
(1 row)
-- empty labels not allowed
SELECT '.2.3'::ltree;
ERROR: ltree syntax error at character 1
LINE 1: SELECT '.2.3'::ltree;
^
SELECT '1..3'::ltree;
ERROR: ltree syntax error at character 3
LINE 1: SELECT '1..3'::ltree;
^
SELECT '1.2.'::ltree;
ERROR: ltree syntax error
LINE 1: SELECT '1.2.'::ltree;
^
DETAIL: Unexpected end of input.
SELECT repeat('x', 255)::ltree;
repeat
-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
(1 row)
SELECT repeat('x', 256)::ltree;
ERROR: label string is too long
DETAIL: Label length is 256, must be at most 255, at character 257.
SELECT ltree2text('1.2.3.34.sdf');
ltree2text
--------------
......@@ -451,12 +474,81 @@ SELECT 'foo.bar{,}.!a*|b{1,}.c{,44}.d{3,4}'::lquery;
foo.bar{,}.!a*|b{1,}.c{,44}.d{3,4}
(1 row)
SELECT 'foo*@@*'::lquery;
lquery
--------
foo@*
(1 row)
SELECT 'qwerty%@*.tu'::lquery;
lquery
--------------
qwerty%@*.tu
(1 row)
-- empty labels not allowed
SELECT '.2.3'::lquery;
ERROR: lquery syntax error at character 1
LINE 1: SELECT '.2.3'::lquery;
^
SELECT '1..3'::lquery;
ERROR: lquery syntax error at character 3
LINE 1: SELECT '1..3'::lquery;
^
SELECT '1.2.'::lquery;
ERROR: lquery syntax error
LINE 1: SELECT '1.2.'::lquery;
^
DETAIL: Unexpected end of input.
SELECT '@.2.3'::lquery;
ERROR: lquery syntax error at character 1
LINE 1: SELECT '@.2.3'::lquery;
^
SELECT '1.@.3'::lquery;
ERROR: lquery syntax error at character 3
LINE 1: SELECT '1.@.3'::lquery;
^
SELECT '1.2.@'::lquery;
ERROR: lquery syntax error at character 5
LINE 1: SELECT '1.2.@'::lquery;
^
SELECT '!.2.3'::lquery;
ERROR: lquery syntax error at character 2
LINE 1: SELECT '!.2.3'::lquery;
^
DETAIL: Empty labels are not allowed.
SELECT '1.!.3'::lquery;
ERROR: lquery syntax error at character 4
LINE 1: SELECT '1.!.3'::lquery;
^
DETAIL: Empty labels are not allowed.
SELECT '1.2.!'::lquery;
ERROR: lquery syntax error at character 6
LINE 1: SELECT '1.2.!'::lquery;
^
DETAIL: Empty labels are not allowed.
SELECT '1.2.3|@.4'::lquery;
ERROR: lquery syntax error at character 7
LINE 1: SELECT '1.2.3|@.4'::lquery;
^
SELECT (repeat('x', 255) || '*@@*')::lquery;
lquery
-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx@*
(1 row)
SELECT (repeat('x', 256) || '*@@*')::lquery;
ERROR: label string is too long
DETAIL: Label length is 256, must be at most 255, at character 257.
SELECT ('!' || repeat('x', 255))::lquery;
lquery
------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
!xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
(1 row)
SELECT ('!' || repeat('x', 256))::lquery;
ERROR: label string is too long
DETAIL: Label length is 256, must be at most 255, at character 258.
SELECT nlevel('1.2.3.4');
nlevel
--------
......@@ -1072,6 +1164,12 @@ SELECT 'QWER_TY'::ltree ~ 'q%@*';
t
(1 row)
SELECT 'QWER_TY'::ltree ~ 'q%@*%@*';
?column?
----------
t
(1 row)
SELECT 'QWER_TY'::ltree ~ 'Q_t%@*';
?column?
----------
......
......@@ -24,6 +24,10 @@ typedef struct
#define LTPRS_WAITNAME 0
#define LTPRS_WAITDELIM 1
static void finish_nodeitem(nodeitem *lptr, const char *ptr,
bool is_lquery, int pos);
/*
* expects a null terminated string
* returns an ltree
......@@ -51,7 +55,7 @@ parse_ltree(const char *buf)
while (*ptr)
{
charlen = pg_mblen(ptr);
if (charlen == 1 && t_iseq(ptr, '.'))
if (t_iseq(ptr, '.'))
num++;
ptr += charlen;
}
......@@ -67,8 +71,9 @@ parse_ltree(const char *buf)
{
charlen = pg_mblen(ptr);
if (state == LTPRS_WAITNAME)
switch (state)
{
case LTPRS_WAITNAME:
if (ISALNUM(ptr))
{
lptr->start = ptr;
......@@ -77,30 +82,21 @@ parse_ltree(const char *buf)
}
else
UNCHAR;
}
else if (state == LTPRS_WAITDELIM)
{
if (charlen == 1 && t_iseq(ptr, '.'))
break;
case LTPRS_WAITDELIM:
if (t_iseq(ptr, '.'))
{
lptr->len = ptr - lptr->start;
if (lptr->wlen > LTREE_LABEL_MAX_CHARS)
ereport(ERROR,
(errcode(ERRCODE_NAME_TOO_LONG),
errmsg("label string is too long"),
errdetail("Label length is %d, must be at most %d, at character %d.",
lptr->wlen, LTREE_LABEL_MAX_CHARS,
pos)));
finish_nodeitem(lptr, ptr, false, pos);
totallen += MAXALIGN(lptr->len + LEVEL_HDRSIZE);
lptr++;
state = LTPRS_WAITNAME;
}
else if (!ISALNUM(ptr))
UNCHAR;
break;
default:
elog(ERROR, "internal error in ltree parser");
}
else
/* internal error */
elog(ERROR, "internal error in parser");
ptr += charlen;
lptr->wlen++;
......@@ -109,14 +105,7 @@ parse_ltree(const char *buf)
if (state == LTPRS_WAITDELIM)
{
lptr->len = ptr - lptr->start;
if (lptr->wlen > LTREE_LABEL_MAX_CHARS)
ereport(ERROR,
(errcode(ERRCODE_NAME_TOO_LONG),
errmsg("label string is too long"),
errdetail("Label length is %d, must be at most %d, at character %d.",
lptr->wlen, LTREE_LABEL_MAX_CHARS, pos)));
finish_nodeitem(lptr, ptr, false, pos);
totallen += MAXALIGN(lptr->len + LEVEL_HDRSIZE);
lptr++;
}
......@@ -298,13 +287,10 @@ parse_lquery(const char *buf)
{
charlen = pg_mblen(ptr);
if (charlen == 1)
{
if (t_iseq(ptr, '.'))
num++;
else if (t_iseq(ptr, '|'))
numOR++;
}
ptr += charlen;
}
......@@ -321,8 +307,9 @@ parse_lquery(const char *buf)
{
charlen = pg_mblen(ptr);
if (state == LQPRS_WAITLEVEL)
switch (state)
{
case LQPRS_WAITLEVEL:
if (ISALNUM(ptr))
{
GETVAR(curqlevel) = lptr = (nodeitem *) palloc0(sizeof(nodeitem) * (numOR + 1));
......@@ -330,22 +317,22 @@ parse_lquery(const char *buf)
state = LQPRS_WAITDELIM;
curqlevel->numvar = 1;
}
else if (charlen == 1 && t_iseq(ptr, '!'))
else if (t_iseq(ptr, '!'))
{
GETVAR(curqlevel) = lptr = (nodeitem *) palloc0(sizeof(nodeitem) * (numOR + 1));
lptr->start = ptr + 1;
lptr->wlen = -1; /* compensate for counting ! below */
state = LQPRS_WAITDELIM;
curqlevel->numvar = 1;
curqlevel->flag |= LQL_NOT;
hasnot = true;
}
else if (charlen == 1 && t_iseq(ptr, '*'))
else if (t_iseq(ptr, '*'))
state = LQPRS_WAITOPEN;
else
UNCHAR;
}
else if (state == LQPRS_WAITVAR)
{
break;
case LQPRS_WAITVAR:
if (ISALNUM(ptr))
{
lptr++;
......@@ -355,93 +342,53 @@ parse_lquery(const char *buf)
}
else
UNCHAR;
}
else if (state == LQPRS_WAITDELIM)
break;
case LQPRS_WAITDELIM:
if (t_iseq(ptr, '@'))
{
if (charlen == 1 && t_iseq(ptr, '@'))
{
if (lptr->start == ptr)
UNCHAR;
lptr->flag |= LVAR_INCASE;
curqlevel->flag |= LVAR_INCASE;
}
else if (charlen == 1 && t_iseq(ptr, '*'))
else if (t_iseq(ptr, '*'))
{
if (lptr->start == ptr)
UNCHAR;
lptr->flag |= LVAR_ANYEND;
curqlevel->flag |= LVAR_ANYEND;
}
else if (charlen == 1 && t_iseq(ptr, '%'))
else if (t_iseq(ptr, '%'))
{
if (lptr->start == ptr)
UNCHAR;
lptr->flag |= LVAR_SUBLEXEME;
curqlevel->flag |= LVAR_SUBLEXEME;
}
else if (charlen == 1 && t_iseq(ptr, '|'))
else if (t_iseq(ptr, '|'))
{
lptr->len = ptr - lptr->start -
((lptr->flag & LVAR_SUBLEXEME) ? 1 : 0) -
((lptr->flag & LVAR_INCASE) ? 1 : 0) -
((lptr->flag & LVAR_ANYEND) ? 1 : 0);
if (lptr->wlen > LTREE_LABEL_MAX_CHARS)
ereport(ERROR,
(errcode(ERRCODE_NAME_TOO_LONG),
errmsg("label string is too long"),
errdetail("Label length is %d, must be at most %d, at character %d.",
lptr->wlen, LTREE_LABEL_MAX_CHARS,
pos)));
finish_nodeitem(lptr, ptr, true, pos);
state = LQPRS_WAITVAR;
}
else if (charlen == 1 && t_iseq(ptr, '{'))
else if (t_iseq(ptr, '{'))
{
lptr->len = ptr - lptr->start -
((lptr->flag & LVAR_SUBLEXEME) ? 1 : 0) -
((lptr->flag & LVAR_INCASE) ? 1 : 0) -
((lptr->flag & LVAR_ANYEND) ? 1 : 0);
if (lptr->wlen > LTREE_LABEL_MAX_CHARS)
ereport(ERROR,
(errcode(ERRCODE_NAME_TOO_LONG),
errmsg("label string is too long"),
errdetail("Label length is %d, must be at most %d, at character %d.",
lptr->wlen, LTREE_LABEL_MAX_CHARS,
pos)));
finish_nodeitem(lptr, ptr, true, pos);
curqlevel->flag |= LQL_COUNT;
state = LQPRS_WAITFNUM;
}
else if (charlen == 1 && t_iseq(ptr, '.'))
else if (t_iseq(ptr, '.'))
{
lptr->len = ptr - lptr->start -
((lptr->flag & LVAR_SUBLEXEME) ? 1 : 0) -
((lptr->flag & LVAR_INCASE) ? 1 : 0) -
((lptr->flag & LVAR_ANYEND) ? 1 : 0);
if (lptr->wlen > LTREE_LABEL_MAX_CHARS)
ereport(ERROR,
(errcode(ERRCODE_NAME_TOO_LONG),
errmsg("label string is too long"),
errdetail("Label length is %d, must be at most %d, at character %d.",
lptr->wlen, LTREE_LABEL_MAX_CHARS,
pos)));
finish_nodeitem(lptr, ptr, true, pos);
state = LQPRS_WAITLEVEL;
curqlevel = NEXTLEV(curqlevel);
}
else if (ISALNUM(ptr))
{
/* disallow more chars after a flag */
if (lptr->flag)
UNCHAR;
}
else
UNCHAR;
}
else if (state == LQPRS_WAITOPEN)
{
if (charlen == 1 && t_iseq(ptr, '{'))
break;
case LQPRS_WAITOPEN:
if (t_iseq(ptr, '{'))
state = LQPRS_WAITFNUM;
else if (charlen == 1 && t_iseq(ptr, '.'))
else if (t_iseq(ptr, '.'))
{
/* We only get here for '*', so these are correct defaults */
curqlevel->low = 0;
......@@ -451,10 +398,9 @@ parse_lquery(const char *buf)
}
else
UNCHAR;
}
else if (state == LQPRS_WAITFNUM)
{
if (charlen == 1 && t_iseq(ptr, ','))
break;
case LQPRS_WAITFNUM:
if (t_iseq(ptr, ','))
state = LQPRS_WAITSNUM;
else if (t_isdigit(ptr))
{
......@@ -472,9 +418,8 @@ parse_lquery(const char *buf)
}
else
UNCHAR;
}
else if (state == LQPRS_WAITSNUM)
{
break;
case LQPRS_WAITSNUM:
if (t_isdigit(ptr))
{
int high = atoi(ptr);
......@@ -495,46 +440,43 @@ parse_lquery(const char *buf)
curqlevel->high = (uint16) high;
state = LQPRS_WAITCLOSE;
}
else if (charlen == 1 && t_iseq(ptr, '}'))
else if (t_iseq(ptr, '}'))
{
curqlevel->high = LTREE_MAX_LEVELS;
state = LQPRS_WAITEND;
}
else
UNCHAR;
}
else if (state == LQPRS_WAITCLOSE)
{
if (charlen == 1 && t_iseq(ptr, '}'))
break;
case LQPRS_WAITCLOSE:
if (t_iseq(ptr, '}'))
state = LQPRS_WAITEND;
else if (!t_isdigit(ptr))
UNCHAR;
}
else if (state == LQPRS_WAITND)
{
if (charlen == 1 && t_iseq(ptr, '}'))
break;
case LQPRS_WAITND:
if (t_iseq(ptr, '}'))
{
curqlevel->high = curqlevel->low;
state = LQPRS_WAITEND;
}
else if (charlen == 1 && t_iseq(ptr, ','))
else if (t_iseq(ptr, ','))
state = LQPRS_WAITSNUM;
else if (!t_isdigit(ptr))
UNCHAR;
}
else if (state == LQPRS_WAITEND)
{
if (charlen == 1 && t_iseq(ptr, '.'))
break;
case LQPRS_WAITEND:
if (t_iseq(ptr, '.'))
{
state = LQPRS_WAITLEVEL;
curqlevel = NEXTLEV(curqlevel);
}
else
UNCHAR;
break;
default:
elog(ERROR, "internal error in lquery parser");
}
else
/* internal error */
elog(ERROR, "internal error in parser");
ptr += charlen;
if (state == LQPRS_WAITDELIM)
......@@ -543,30 +485,7 @@ parse_lquery(const char *buf)
}
if (state == LQPRS_WAITDELIM)
{
if (lptr->start == ptr)
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("lquery syntax error"),
errdetail("Unexpected end of input.")));
lptr->len = ptr - lptr->start -
((lptr->flag & LVAR_SUBLEXEME) ? 1 : 0) -
((lptr->flag & LVAR_INCASE) ? 1 : 0) -
((lptr->flag & LVAR_ANYEND) ? 1 : 0);
if (lptr->len == 0)
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("lquery syntax error"),
errdetail("Unexpected end of input.")));
if (lptr->wlen > LTREE_LABEL_MAX_CHARS)
ereport(ERROR,
(errcode(ERRCODE_NAME_TOO_LONG),
errmsg("label string is too long"),
errdetail("Label length is %d, must be at most %d, at character %d.",
lptr->wlen, LTREE_LABEL_MAX_CHARS, pos)));
}
finish_nodeitem(lptr, ptr, true, pos);
else if (state == LQPRS_WAITOPEN)
curqlevel->high = LTREE_MAX_LEVELS;
else if (state != LQPRS_WAITEND)
......@@ -646,6 +565,46 @@ parse_lquery(const char *buf)
#undef UNCHAR
}
/*
* Close out parsing an ltree or lquery nodeitem:
* compute the correct length, and complain if it's not OK
*/
static void
finish_nodeitem(nodeitem *lptr, const char *ptr, bool is_lquery, int pos)
{
if (is_lquery)
{
/*
* Back up over any flag characters, and discount them from length and
* position.
*/
while (ptr > lptr->start && strchr("@*%", ptr[-1]) != NULL)
{
ptr--;
lptr->wlen--;
pos--;
}
}
/* Now compute the byte length, which we weren't tracking before. */
lptr->len = ptr - lptr->start;
/* Complain if it's empty or too long */
if (lptr->len == 0)
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
is_lquery ?
errmsg("lquery syntax error at character %d", pos) :
errmsg("ltree syntax error at character %d", pos),
errdetail("Empty labels are not allowed.")));
if (lptr->wlen > LTREE_LABEL_MAX_CHARS)
ereport(ERROR,
(errcode(ERRCODE_NAME_TOO_LONG),
errmsg("label string is too long"),
errdetail("Label length is %d, must be at most %d, at character %d.",
lptr->wlen, LTREE_LABEL_MAX_CHARS, pos)));
}
/*
* expects an lquery
* returns a null terminated string
......
......@@ -10,6 +10,14 @@ SELECT '1'::ltree;
SELECT '1.2'::ltree;
SELECT '1.2._3'::ltree;
-- empty labels not allowed
SELECT '.2.3'::ltree;
SELECT '1..3'::ltree;
SELECT '1.2.'::ltree;
SELECT repeat('x', 255)::ltree;
SELECT repeat('x', 256)::ltree;
SELECT ltree2text('1.2.3.34.sdf');
SELECT text2ltree('1.2.3.34.sdf');
......@@ -88,8 +96,26 @@ SELECT '1.*.4|3|2.*{,4}'::lquery;
SELECT '1.*.4|3|2.*{1,}'::lquery;
SELECT '1.*.4|3|2.*{1}'::lquery;
SELECT 'foo.bar{,}.!a*|b{1,}.c{,44}.d{3,4}'::lquery;
SELECT 'foo*@@*'::lquery;
SELECT 'qwerty%@*.tu'::lquery;
-- empty labels not allowed
SELECT '.2.3'::lquery;
SELECT '1..3'::lquery;
SELECT '1.2.'::lquery;
SELECT '@.2.3'::lquery;
SELECT '1.@.3'::lquery;
SELECT '1.2.@'::lquery;
SELECT '!.2.3'::lquery;
SELECT '1.!.3'::lquery;
SELECT '1.2.!'::lquery;
SELECT '1.2.3|@.4'::lquery;
SELECT (repeat('x', 255) || '*@@*')::lquery;
SELECT (repeat('x', 256) || '*@@*')::lquery;
SELECT ('!' || repeat('x', 255))::lquery;
SELECT ('!' || repeat('x', 256))::lquery;
SELECT nlevel('1.2.3.4');
SELECT nlevel(('1' || repeat('.1', 65534))::ltree);
SELECT nlevel(('1' || repeat('.1', 65535))::ltree);
......@@ -200,6 +226,7 @@ SELECT 'a.b.c.d.e'::ltree ~ '!c{0,3}.!a{2,}';
SELECT 'a.b.c.d.e'::ltree ~ '!c{0,3}.!d{2,}.*';
SELECT 'QWER_TY'::ltree ~ 'q%@*';
SELECT 'QWER_TY'::ltree ~ 'q%@*%@*';
SELECT 'QWER_TY'::ltree ~ 'Q_t%@*';
SELECT 'QWER_GY'::ltree ~ 'q_t%@*';
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment