Commit dbde97cd authored by Tom Lane's avatar Tom Lane

Rewrite LIKE's %-followed-by-_ optimization so it really works (this time

for sure ;-)).  It now also optimizes more cases, such as %_%_.  Improve
comments too.  Per bug #5478.

In passing, also rename the TCHAR macro to GETCHAR, because pgindent is
messing with the formatting of the former (apparently it now thinks TCHAR
is a typedef name).

Back-patch to 8.3, where the bug was introduced.
parent e54b0cba
...@@ -14,12 +14,12 @@ ...@@ -14,12 +14,12 @@
* NextChar * NextChar
* MatchText - to name of function wanted * MatchText - to name of function wanted
* do_like_escape - name of function if wanted - needs CHAREQ and CopyAdvChar * do_like_escape - name of function if wanted - needs CHAREQ and CopyAdvChar
* MATCH_LOWER - define iff using to_lower on text chars * MATCH_LOWER - define for case (4), using to_lower on single-byte chars
* *
* Copyright (c) 1996-2010, PostgreSQL Global Development Group * Copyright (c) 1996-2010, PostgreSQL Global Development Group
* *
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/utils/adt/like_match.c,v 1.27 2010/01/02 16:57:54 momjian Exp $ * $PostgreSQL: pgsql/src/backend/utils/adt/like_match.c,v 1.28 2010/05/28 17:35:23 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -70,9 +70,9 @@ ...@@ -70,9 +70,9 @@
*/ */
#ifdef MATCH_LOWER #ifdef MATCH_LOWER
#define TCHAR(t) ((char) tolower((unsigned char) (t))) #define GETCHAR(t) ((char) tolower((unsigned char) (t)))
#else #else
#define TCHAR(t) (t) #define GETCHAR(t) (t)
#endif #endif
static int static int
...@@ -101,76 +101,72 @@ MatchText(char *t, int tlen, char *p, int plen) ...@@ -101,76 +101,72 @@ MatchText(char *t, int tlen, char *p, int plen)
ereport(ERROR, ereport(ERROR,
(errcode(ERRCODE_INVALID_ESCAPE_SEQUENCE), (errcode(ERRCODE_INVALID_ESCAPE_SEQUENCE),
errmsg("LIKE pattern must not end with escape character"))); errmsg("LIKE pattern must not end with escape character")));
if (TCHAR (*p) != TCHAR (*t)) if (GETCHAR(*p) != GETCHAR(*t))
return LIKE_FALSE; return LIKE_FALSE;
} }
else if (*p == '%') else if (*p == '%')
{ {
char firstpat;
/* /*
* % processing is essentially a search for a match for what * % processing is essentially a search for a text position at
* follows the %, plus a recursive match of the remainder. We * which the remainder of the text matches the remainder of the
* succeed if and only if both conditions are met. * pattern, using a recursive call to check each potential match.
*
* If there are wildcards immediately following the %, we can skip
* over them first, using the idea that any sequence of N _'s and
* one or more %'s is equivalent to N _'s and one % (ie, it will
* match any sequence of at least N text characters). In this
* way we will always run the recursive search loop using a
* pattern fragment that begins with a literal character-to-match,
* thereby not recursing more than we have to.
*/ */
/* %% is the same as % according to the SQL standard */
/* Advance past all %'s */
while (plen > 0 && *p == '%')
NextByte(p, plen); NextByte(p, plen);
/* Trailing percent matches everything. */
if (plen <= 0)
return LIKE_TRUE;
/* while (plen > 0)
* Otherwise, scan for a text position at which we can match the
* rest of the pattern.
*/
if (*p == '_')
{ {
/* %_ is the same as _% - avoid matching _ repeatedly */ if (*p == '%')
NextByte(p, plen);
do else if (*p == '_')
{ {
/* If not enough text left to match the pattern, ABORT */
if (tlen <= 0)
return LIKE_ABORT;
NextChar(t, tlen); NextChar(t, tlen);
NextByte(p, plen); NextByte(p, plen);
} while (tlen > 0 && plen > 0 && *p == '_'); }
else
break; /* Reached a non-wildcard pattern char */
}
/* /*
* If we are at the end of the pattern, succeed: % followed by * If we're at end of pattern, match: we have a trailing % which
* n _'s matches any string of at least n characters, and we * matches any remaining text string.
* have now found there are at least n characters.
*/ */
if (plen <= 0) if (plen <= 0)
return LIKE_TRUE; return LIKE_TRUE;
/* Look for a place that matches the rest of the pattern */ /*
while (tlen > 0) * Otherwise, scan for a text position at which we can match the
{ * rest of the pattern. The first remaining pattern char is known
int matched = MatchText(t, tlen, p, plen); * to be a regular or escaped literal character, so we can compare
* the first pattern byte to each text byte to avoid recursing
if (matched != LIKE_FALSE) * more than we have to. This fact also guarantees that we don't
return matched; /* TRUE or ABORT */ * have to consider a match to the zero-length substring at the
* end of the text.
NextChar(t, tlen); */
}
}
else
{
char firstpat = TCHAR (*p);
if (*p == '\\') if (*p == '\\')
{ {
if (plen < 2) if (plen < 2)
return LIKE_FALSE; return LIKE_FALSE; /* XXX should throw error */
firstpat = TCHAR (p[1]); firstpat = GETCHAR(p[1]);
} }
else
firstpat = GETCHAR(*p);
while (tlen > 0) while (tlen > 0)
{ {
/* if (GETCHAR(*t) == firstpat)
* Optimization to prevent most recursion: don't recurse
* unless first pattern byte matches first text byte.
*/
if (TCHAR (*t) == firstpat)
{ {
int matched = MatchText(t, tlen, p, plen); int matched = MatchText(t, tlen, p, plen);
...@@ -180,7 +176,6 @@ MatchText(char *t, int tlen, char *p, int plen) ...@@ -180,7 +176,6 @@ MatchText(char *t, int tlen, char *p, int plen)
NextChar(t, tlen); NextChar(t, tlen);
} }
}
/* /*
* End of text with no match, so no point in trying later places * End of text with no match, so no point in trying later places
...@@ -195,7 +190,7 @@ MatchText(char *t, int tlen, char *p, int plen) ...@@ -195,7 +190,7 @@ MatchText(char *t, int tlen, char *p, int plen)
NextByte(p, plen); NextByte(p, plen);
continue; continue;
} }
else if (TCHAR (*p) != TCHAR (*t)) else if (GETCHAR(*p) != GETCHAR(*t))
{ {
/* non-wildcard pattern char fails to match text char */ /* non-wildcard pattern char fails to match text char */
return LIKE_FALSE; return LIKE_FALSE;
...@@ -220,11 +215,12 @@ MatchText(char *t, int tlen, char *p, int plen) ...@@ -220,11 +215,12 @@ MatchText(char *t, int tlen, char *p, int plen)
if (tlen > 0) if (tlen > 0)
return LIKE_FALSE; /* end of pattern, but not of text */ return LIKE_FALSE; /* end of pattern, but not of text */
/* End of text string. Do we have matching pattern remaining? */ /*
while (plen > 0 && *p == '%') /* allow multiple %'s at end of * End of text, but perhaps not of pattern. Match iff the remaining
* pattern */ * pattern can match a zero-length string, ie, it's zero or more %'s.
*/
while (plen > 0 && *p == '%')
NextByte(p, plen); NextByte(p, plen);
if (plen <= 0) if (plen <= 0)
return LIKE_TRUE; return LIKE_TRUE;
...@@ -348,7 +344,7 @@ do_like_escape(text *pat, text *esc) ...@@ -348,7 +344,7 @@ do_like_escape(text *pat, text *esc)
#undef do_like_escape #undef do_like_escape
#endif #endif
#undef TCHAR #undef GETCHAR
#ifdef MATCH_LOWER #ifdef MATCH_LOWER
#undef MATCH_LOWER #undef MATCH_LOWER
......
...@@ -996,7 +996,7 @@ SELECT 'Hawkeye' NOT ILIKE 'h%' AS "false"; ...@@ -996,7 +996,7 @@ SELECT 'Hawkeye' NOT ILIKE 'h%' AS "false";
(1 row) (1 row)
-- --
-- test %/_ combination cases, cf bug #4821 -- test %/_ combination cases, cf bugs #4821 and #5478
-- --
SELECT 'foo' LIKE '_%' as t, 'f' LIKE '_%' as t, '' LIKE '_%' as f; SELECT 'foo' LIKE '_%' as t, 'f' LIKE '_%' as t, '' LIKE '_%' as f;
t | t | f t | t | f
...@@ -1022,6 +1022,12 @@ SELECT 'foo' LIKE '%__' as t, 'foo' LIKE '%___' as t, 'foo' LIKE '%____' as f; ...@@ -1022,6 +1022,12 @@ SELECT 'foo' LIKE '%__' as t, 'foo' LIKE '%___' as t, 'foo' LIKE '%____' as f;
t | t | f t | t | f
(1 row) (1 row)
SELECT 'jack' LIKE '%____%' AS t;
t
---
t
(1 row)
-- --
-- test implicit type conversion -- test implicit type conversion
-- --
......
...@@ -301,7 +301,7 @@ SELECT 'Hawkeye' ILIKE 'h%' AS "true"; ...@@ -301,7 +301,7 @@ SELECT 'Hawkeye' ILIKE 'h%' AS "true";
SELECT 'Hawkeye' NOT ILIKE 'h%' AS "false"; SELECT 'Hawkeye' NOT ILIKE 'h%' AS "false";
-- --
-- test %/_ combination cases, cf bug #4821 -- test %/_ combination cases, cf bugs #4821 and #5478
-- --
SELECT 'foo' LIKE '_%' as t, 'f' LIKE '_%' as t, '' LIKE '_%' as f; SELECT 'foo' LIKE '_%' as t, 'f' LIKE '_%' as t, '' LIKE '_%' as f;
...@@ -310,6 +310,8 @@ SELECT 'foo' LIKE '%_' as t, 'f' LIKE '%_' as t, '' LIKE '%_' as f; ...@@ -310,6 +310,8 @@ SELECT 'foo' LIKE '%_' as t, 'f' LIKE '%_' as t, '' LIKE '%_' as f;
SELECT 'foo' LIKE '__%' as t, 'foo' LIKE '___%' as t, 'foo' LIKE '____%' as f; SELECT 'foo' LIKE '__%' as t, 'foo' LIKE '___%' as t, 'foo' LIKE '____%' as f;
SELECT 'foo' LIKE '%__' as t, 'foo' LIKE '%___' as t, 'foo' LIKE '%____' as f; SELECT 'foo' LIKE '%__' as t, 'foo' LIKE '%___' as t, 'foo' LIKE '%____' as f;
SELECT 'jack' LIKE '%____%' AS t;
-- --
-- test implicit type conversion -- test implicit type conversion
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment