Commit 78c88767 authored by Peter Eisentraut's avatar Peter Eisentraut

Add current substring regular expression syntax

SQL:1999 had syntax

    SUBSTRING(text FROM pattern FOR escapechar)

but this was replaced in SQL:2003 by the more clear

    SUBSTRING(text SIMILAR pattern ESCAPE escapechar)

but this was never implemented in PostgreSQL.  This patch adds that
new syntax as an alternative in the parser, and updates documentation
and tests to indicate that this is the preferred alternative now.
Reviewed-by: default avatarPavel Stehule <pavel.stehule@gmail.com>
Reviewed-by: default avatarVik Fearing <vik@postgresfriends.org>
Reviewed-by: default avatarFabien COELHO <coelho@cri.ensmp.fr>
Discussion: https://www.postgresql.org/message-id/flat/a15db31c-d0f8-8ce0-9039-578a31758adb%402ndquadrant.com
parent aafefb4d
...@@ -1602,7 +1602,7 @@ SELECT substring('Thomas'::citext from '...$') = 'mas' AS t; ...@@ -1602,7 +1602,7 @@ SELECT substring('Thomas'::citext from '...$') = 'mas' AS t;
t t
(1 row) (1 row)
SELECT substring('Thomas'::citext from '%#"o_a#"_' for '#') = 'oma' AS t; SELECT substring('Thomas'::citext similar '%#"o_a#"_' escape '#') = 'oma' AS t;
t t
--- ---
t t
......
...@@ -1602,7 +1602,7 @@ SELECT substring('Thomas'::citext from '...$') = 'mas' AS t; ...@@ -1602,7 +1602,7 @@ SELECT substring('Thomas'::citext from '...$') = 'mas' AS t;
t t
(1 row) (1 row)
SELECT substring('Thomas'::citext from '%#"o_a#"_' for '#') = 'oma' AS t; SELECT substring('Thomas'::citext similar '%#"o_a#"_' escape '#') = 'oma' AS t;
t t
--- ---
t t
......
...@@ -564,7 +564,7 @@ SELECT substring('alphabet'::citext, 3, 2) = 'ph' AS t; ...@@ -564,7 +564,7 @@ SELECT substring('alphabet'::citext, 3, 2) = 'ph' AS t;
SELECT substring('Thomas'::citext from 2 for 3) = 'hom' AS t; SELECT substring('Thomas'::citext from 2 for 3) = 'hom' AS t;
SELECT substring('Thomas'::citext from 2) = 'homas' AS t; SELECT substring('Thomas'::citext from 2) = 'homas' AS t;
SELECT substring('Thomas'::citext from '...$') = 'mas' AS t; SELECT substring('Thomas'::citext from '...$') = 'mas' AS t;
SELECT substring('Thomas'::citext from '%#"o_a#"_' for '#') = 'oma' AS t; SELECT substring('Thomas'::citext similar '%#"o_a#"_' escape '#') = 'oma' AS t;
SELECT trim(' trim '::citext) = 'trim' AS t; SELECT trim(' trim '::citext) = 'trim' AS t;
SELECT trim('xxxxxtrimxxxx'::citext, 'x'::citext) = 'trim' AS t; SELECT trim('xxxxxtrimxxxx'::citext, 'x'::citext) = 'trim' AS t;
......
...@@ -2669,15 +2669,21 @@ repeat('Pg', 4) <returnvalue>PgPgPgPg</returnvalue> ...@@ -2669,15 +2669,21 @@ repeat('Pg', 4) <returnvalue>PgPgPgPg</returnvalue>
<row> <row>
<entry role="func_table_entry"><para role="func_signature"> <entry role="func_table_entry"><para role="func_signature">
<function>substring</function> ( <parameter>string</parameter> <type>text</type> <literal>SIMILAR</literal> <parameter>pattern</parameter> <type>text</type> <literal>ESCAPE</literal> <parameter>escape</parameter> <type>text</type> )
<returnvalue>text</returnvalue>
</para>
<para role="func_signature">
<function>substring</function> ( <parameter>string</parameter> <type>text</type> <literal>FROM</literal> <parameter>pattern</parameter> <type>text</type> <literal>FOR</literal> <parameter>escape</parameter> <type>text</type> ) <function>substring</function> ( <parameter>string</parameter> <type>text</type> <literal>FROM</literal> <parameter>pattern</parameter> <type>text</type> <literal>FOR</literal> <parameter>escape</parameter> <type>text</type> )
<returnvalue>text</returnvalue> <returnvalue>text</returnvalue>
</para> </para>
<para> <para>
Extracts substring matching <acronym>SQL</acronym> regular expression; Extracts substring matching <acronym>SQL</acronym> regular expression;
see <xref linkend="functions-similarto-regexp"/>. see <xref linkend="functions-similarto-regexp"/>. The first form has
been specified since SQL:2003; the second form was only in SQL:1999
and should be considered obsolete.
</para> </para>
<para> <para>
<literal>substring('Thomas' from '%#"o_a#"_' for '#')</literal> <literal>substring('Thomas' similar '%#"o_a#"_' escape '#')</literal>
<returnvalue>oma</returnvalue> <returnvalue>oma</returnvalue>
</para></entry> </para></entry>
</row> </row>
...@@ -5160,7 +5166,11 @@ cast(-44 as bit(12)) <lineannotation>111111010100</lineannotation> ...@@ -5160,7 +5166,11 @@ cast(-44 as bit(12)) <lineannotation>111111010100</lineannotation>
The <function>substring</function> function with three parameters The <function>substring</function> function with three parameters
provides extraction of a substring that matches an SQL provides extraction of a substring that matches an SQL
regular expression pattern. The function can be written according regular expression pattern. The function can be written according
to SQL99 syntax: to standard SQL syntax:
<synopsis>
substring(<replaceable>string</replaceable> similar <replaceable>pattern</replaceable> escape <replaceable>escape-character</replaceable>)
</synopsis>
or using the now obsolete SQL:1999 syntax:
<synopsis> <synopsis>
substring(<replaceable>string</replaceable> from <replaceable>pattern</replaceable> for <replaceable>escape-character</replaceable>) substring(<replaceable>string</replaceable> from <replaceable>pattern</replaceable> for <replaceable>escape-character</replaceable>)
</synopsis> </synopsis>
...@@ -5201,8 +5211,8 @@ substring(<replaceable>string</replaceable>, <replaceable>pattern</replaceable>, ...@@ -5201,8 +5211,8 @@ substring(<replaceable>string</replaceable>, <replaceable>pattern</replaceable>,
<para> <para>
Some examples, with <literal>#&quot;</literal> delimiting the return string: Some examples, with <literal>#&quot;</literal> delimiting the return string:
<programlisting> <programlisting>
substring('foobar' from '%#"o_b#"%' for '#') <lineannotation>oob</lineannotation> substring('foobar' similar '%#"o_b#"%' escape '#') <lineannotation>oob</lineannotation>
substring('foobar' from '#"o_b#"%' for '#') <lineannotation>NULL</lineannotation> substring('foobar' similar '#"o_b#"%' escape '#') <lineannotation>NULL</lineannotation>
</programlisting> </programlisting>
</para> </para>
</sect2> </sect2>
......
...@@ -182,7 +182,7 @@ CREATE FUNCTION _pg_interval_type(typid oid, mod int4) RETURNS text ...@@ -182,7 +182,7 @@ CREATE FUNCTION _pg_interval_type(typid oid, mod int4) RETURNS text
AS AS
$$SELECT $$SELECT
CASE WHEN $1 IN (1186) /* interval */ CASE WHEN $1 IN (1186) /* interval */
THEN pg_catalog.upper(substring(pg_catalog.format_type($1, $2) from 'interval[()0-9]* #"%#"' for '#')) THEN pg_catalog.upper(substring(pg_catalog.format_type($1, $2) similar 'interval[()0-9]* #"%#"' escape '#'))
ELSE null ELSE null
END$$; END$$;
......
...@@ -14451,7 +14451,27 @@ position_list: ...@@ -14451,7 +14451,27 @@ position_list:
| /*EMPTY*/ { $$ = NIL; } | /*EMPTY*/ { $$ = NIL; }
; ;
/* SUBSTRING() arguments */ /*
* SUBSTRING() arguments
*
* Note that SQL:1999 has both
*
* text FROM int FOR int
*
* and
*
* text FROM pattern FOR escape
*
* In the parser we map them both to a call to the substring() function and
* rely on type resolution to pick the right one.
*
* In SQL:2003, the second variant was changed to
*
* text SIMILAR pattern ESCAPE escape
*
* We could in theory map that to a different function internally, but
* since we still support the SQL:1999 version, we don't.
*/
substr_list: substr_list:
a_expr FROM a_expr FOR a_expr a_expr FROM a_expr FOR a_expr
{ {
...@@ -14483,6 +14503,10 @@ substr_list: ...@@ -14483,6 +14503,10 @@ substr_list:
makeTypeCast($3, makeTypeCast($3,
SystemTypeName("int4"), -1)); SystemTypeName("int4"), -1));
} }
| a_expr SIMILAR a_expr ESCAPE a_expr
{
$$ = list_make3($1, $3, $5);
}
/* /*
* We also want to support generic substring functions that * We also want to support generic substring functions that
* accept the usual generic list of arguments. * accept the usual generic list of arguments.
......
...@@ -397,6 +397,13 @@ SELECT SUBSTRING('1234567890' FROM 4 FOR 3) = '456' AS "456"; ...@@ -397,6 +397,13 @@ SELECT SUBSTRING('1234567890' FROM 4 FOR 3) = '456' AS "456";
(1 row) (1 row)
-- T581 regular expression substring (with SQL's bizarre regexp syntax) -- T581 regular expression substring (with SQL's bizarre regexp syntax)
SELECT SUBSTRING('abcdefg' SIMILAR 'a#"(b_d)#"%' ESCAPE '#') AS "bcd";
bcd
-----
bcd
(1 row)
-- obsolete SQL99 syntax
SELECT SUBSTRING('abcdefg' FROM 'a#"(b_d)#"%' FOR '#') AS "bcd"; SELECT SUBSTRING('abcdefg' FROM 'a#"(b_d)#"%' FOR '#') AS "bcd";
bcd bcd
----- -----
...@@ -404,75 +411,75 @@ SELECT SUBSTRING('abcdefg' FROM 'a#"(b_d)#"%' FOR '#') AS "bcd"; ...@@ -404,75 +411,75 @@ SELECT SUBSTRING('abcdefg' FROM 'a#"(b_d)#"%' FOR '#') AS "bcd";
(1 row) (1 row)
-- No match should return NULL -- No match should return NULL
SELECT SUBSTRING('abcdefg' FROM '#"(b_d)#"%' FOR '#') IS NULL AS "True"; SELECT SUBSTRING('abcdefg' SIMILAR '#"(b_d)#"%' ESCAPE '#') IS NULL AS "True";
True True
------ ------
t t
(1 row) (1 row)
-- Null inputs should return NULL -- Null inputs should return NULL
SELECT SUBSTRING('abcdefg' FROM '%' FOR NULL) IS NULL AS "True"; SELECT SUBSTRING('abcdefg' SIMILAR '%' ESCAPE NULL) IS NULL AS "True";
True True
------ ------
t t
(1 row) (1 row)
SELECT SUBSTRING(NULL FROM '%' FOR '#') IS NULL AS "True"; SELECT SUBSTRING(NULL SIMILAR '%' ESCAPE '#') IS NULL AS "True";
True True
------ ------
t t
(1 row) (1 row)
SELECT SUBSTRING('abcdefg' FROM NULL FOR '#') IS NULL AS "True"; SELECT SUBSTRING('abcdefg' SIMILAR NULL ESCAPE '#') IS NULL AS "True";
True True
------ ------
t t
(1 row) (1 row)
-- The first and last parts should act non-greedy -- The first and last parts should act non-greedy
SELECT SUBSTRING('abcdefg' FROM 'a#"%#"g' FOR '#') AS "bcdef"; SELECT SUBSTRING('abcdefg' SIMILAR 'a#"%#"g' ESCAPE '#') AS "bcdef";
bcdef bcdef
------- -------
bcdef bcdef
(1 row) (1 row)
SELECT SUBSTRING('abcdefg' FROM 'a*#"%#"g*' FOR '#') AS "abcdefg"; SELECT SUBSTRING('abcdefg' SIMILAR 'a*#"%#"g*' ESCAPE '#') AS "abcdefg";
abcdefg abcdefg
--------- ---------
abcdefg abcdefg
(1 row) (1 row)
-- Vertical bar in any part affects only that part -- Vertical bar in any part affects only that part
SELECT SUBSTRING('abcdefg' FROM 'a|b#"%#"g' FOR '#') AS "bcdef"; SELECT SUBSTRING('abcdefg' SIMILAR 'a|b#"%#"g' ESCAPE '#') AS "bcdef";
bcdef bcdef
------- -------
bcdef bcdef
(1 row) (1 row)
SELECT SUBSTRING('abcdefg' FROM 'a#"%#"x|g' FOR '#') AS "bcdef"; SELECT SUBSTRING('abcdefg' SIMILAR 'a#"%#"x|g' ESCAPE '#') AS "bcdef";
bcdef bcdef
------- -------
bcdef bcdef
(1 row) (1 row)
SELECT SUBSTRING('abcdefg' FROM 'a#"%|ab#"g' FOR '#') AS "bcdef"; SELECT SUBSTRING('abcdefg' SIMILAR 'a#"%|ab#"g' ESCAPE '#') AS "bcdef";
bcdef bcdef
------- -------
bcdef bcdef
(1 row) (1 row)
-- Can't have more than two part separators -- Can't have more than two part separators
SELECT SUBSTRING('abcdefg' FROM 'a*#"%#"g*#"x' FOR '#') AS "error"; SELECT SUBSTRING('abcdefg' SIMILAR 'a*#"%#"g*#"x' ESCAPE '#') AS "error";
ERROR: SQL regular expression may not contain more than two escape-double-quote separators ERROR: SQL regular expression may not contain more than two escape-double-quote separators
CONTEXT: SQL function "substring" statement 1 CONTEXT: SQL function "substring" statement 1
-- Postgres extension: with 0 or 1 separator, assume parts 1 and 3 are empty -- Postgres extension: with 0 or 1 separator, assume parts 1 and 3 are empty
SELECT SUBSTRING('abcdefg' FROM 'a#"%g' FOR '#') AS "bcdefg"; SELECT SUBSTRING('abcdefg' SIMILAR 'a#"%g' ESCAPE '#') AS "bcdefg";
bcdefg bcdefg
-------- --------
bcdefg bcdefg
(1 row) (1 row)
SELECT SUBSTRING('abcdefg' FROM 'a%g' FOR '#') AS "abcdefg"; SELECT SUBSTRING('abcdefg' SIMILAR 'a%g' ESCAPE '#') AS "abcdefg";
abcdefg abcdefg
--------- ---------
abcdefg abcdefg
......
...@@ -132,31 +132,33 @@ SELECT SUBSTRING('1234567890' FROM 3) = '34567890' AS "34567890"; ...@@ -132,31 +132,33 @@ SELECT SUBSTRING('1234567890' FROM 3) = '34567890' AS "34567890";
SELECT SUBSTRING('1234567890' FROM 4 FOR 3) = '456' AS "456"; SELECT SUBSTRING('1234567890' FROM 4 FOR 3) = '456' AS "456";
-- T581 regular expression substring (with SQL's bizarre regexp syntax) -- T581 regular expression substring (with SQL's bizarre regexp syntax)
SELECT SUBSTRING('abcdefg' SIMILAR 'a#"(b_d)#"%' ESCAPE '#') AS "bcd";
-- obsolete SQL99 syntax
SELECT SUBSTRING('abcdefg' FROM 'a#"(b_d)#"%' FOR '#') AS "bcd"; SELECT SUBSTRING('abcdefg' FROM 'a#"(b_d)#"%' FOR '#') AS "bcd";
-- No match should return NULL -- No match should return NULL
SELECT SUBSTRING('abcdefg' FROM '#"(b_d)#"%' FOR '#') IS NULL AS "True"; SELECT SUBSTRING('abcdefg' SIMILAR '#"(b_d)#"%' ESCAPE '#') IS NULL AS "True";
-- Null inputs should return NULL -- Null inputs should return NULL
SELECT SUBSTRING('abcdefg' FROM '%' FOR NULL) IS NULL AS "True"; SELECT SUBSTRING('abcdefg' SIMILAR '%' ESCAPE NULL) IS NULL AS "True";
SELECT SUBSTRING(NULL FROM '%' FOR '#') IS NULL AS "True"; SELECT SUBSTRING(NULL SIMILAR '%' ESCAPE '#') IS NULL AS "True";
SELECT SUBSTRING('abcdefg' FROM NULL FOR '#') IS NULL AS "True"; SELECT SUBSTRING('abcdefg' SIMILAR NULL ESCAPE '#') IS NULL AS "True";
-- The first and last parts should act non-greedy -- The first and last parts should act non-greedy
SELECT SUBSTRING('abcdefg' FROM 'a#"%#"g' FOR '#') AS "bcdef"; SELECT SUBSTRING('abcdefg' SIMILAR 'a#"%#"g' ESCAPE '#') AS "bcdef";
SELECT SUBSTRING('abcdefg' FROM 'a*#"%#"g*' FOR '#') AS "abcdefg"; SELECT SUBSTRING('abcdefg' SIMILAR 'a*#"%#"g*' ESCAPE '#') AS "abcdefg";
-- Vertical bar in any part affects only that part -- Vertical bar in any part affects only that part
SELECT SUBSTRING('abcdefg' FROM 'a|b#"%#"g' FOR '#') AS "bcdef"; SELECT SUBSTRING('abcdefg' SIMILAR 'a|b#"%#"g' ESCAPE '#') AS "bcdef";
SELECT SUBSTRING('abcdefg' FROM 'a#"%#"x|g' FOR '#') AS "bcdef"; SELECT SUBSTRING('abcdefg' SIMILAR 'a#"%#"x|g' ESCAPE '#') AS "bcdef";
SELECT SUBSTRING('abcdefg' FROM 'a#"%|ab#"g' FOR '#') AS "bcdef"; SELECT SUBSTRING('abcdefg' SIMILAR 'a#"%|ab#"g' ESCAPE '#') AS "bcdef";
-- Can't have more than two part separators -- Can't have more than two part separators
SELECT SUBSTRING('abcdefg' FROM 'a*#"%#"g*#"x' FOR '#') AS "error"; SELECT SUBSTRING('abcdefg' SIMILAR 'a*#"%#"g*#"x' ESCAPE '#') AS "error";
-- Postgres extension: with 0 or 1 separator, assume parts 1 and 3 are empty -- Postgres extension: with 0 or 1 separator, assume parts 1 and 3 are empty
SELECT SUBSTRING('abcdefg' FROM 'a#"%g' FOR '#') AS "bcdefg"; SELECT SUBSTRING('abcdefg' SIMILAR 'a#"%g' ESCAPE '#') AS "bcdefg";
SELECT SUBSTRING('abcdefg' FROM 'a%g' FOR '#') AS "abcdefg"; SELECT SUBSTRING('abcdefg' SIMILAR 'a%g' ESCAPE '#') AS "abcdefg";
-- substring() with just two arguments is not allowed by SQL spec; -- substring() with just two arguments is not allowed by SQL spec;
-- we accept it, but we interpret the pattern as a POSIX regexp not SQL -- we accept it, but we interpret the pattern as a POSIX regexp not SQL
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment