Commit 00f11f41 authored by Tom Lane's avatar Tom Lane

Fix ILIKE to honor collation when working in single-byte encodings.

The original collation patch only fixed the multi-byte code path.
This change also ensures that ILIKE's idea of the case-folding rules
is exactly the same as str_tolower's.
parent f89e4dfa
...@@ -19,8 +19,10 @@ ...@@ -19,8 +19,10 @@
#include <ctype.h> #include <ctype.h>
#include "catalog/pg_collation.h"
#include "mb/pg_wchar.h" #include "mb/pg_wchar.h"
#include "utils/builtins.h" #include "utils/builtins.h"
#include "utils/pg_locale.h"
#define LIKE_TRUE 1 #define LIKE_TRUE 1
...@@ -28,15 +30,19 @@ ...@@ -28,15 +30,19 @@
#define LIKE_ABORT (-1) #define LIKE_ABORT (-1)
static int SB_MatchText(char *t, int tlen, char *p, int plen); static int SB_MatchText(char *t, int tlen, char *p, int plen,
pg_locale_t locale, bool locale_is_c);
static text *SB_do_like_escape(text *, text *); static text *SB_do_like_escape(text *, text *);
static int MB_MatchText(char *t, int tlen, char *p, int plen); static int MB_MatchText(char *t, int tlen, char *p, int plen,
pg_locale_t locale, bool locale_is_c);
static text *MB_do_like_escape(text *, text *); static text *MB_do_like_escape(text *, text *);
static int UTF8_MatchText(char *t, int tlen, char *p, int plen); static int UTF8_MatchText(char *t, int tlen, char *p, int plen,
pg_locale_t locale, bool locale_is_c);
static int SB_IMatchText(char *t, int tlen, char *p, int plen); static int SB_IMatchText(char *t, int tlen, char *p, int plen,
pg_locale_t locale, bool locale_is_c);
static int GenericMatchText(char *s, int slen, char *p, int plen); static int GenericMatchText(char *s, int slen, char *p, int plen);
static int Generic_Text_IC_like(text *str, text *pat, Oid collation); static int Generic_Text_IC_like(text *str, text *pat, Oid collation);
...@@ -78,6 +84,24 @@ wchareq(char *p1, char *p2) ...@@ -78,6 +84,24 @@ wchareq(char *p1, char *p2)
* comparison. This should be revisited when we install better locale support. * comparison. This should be revisited when we install better locale support.
*/ */
/*
* We do handle case-insensitive matching for single-byte encodings using
* fold-on-the-fly processing, however.
*/
static char
SB_lower_char(unsigned char c, pg_locale_t locale, bool locale_is_c)
{
if (locale_is_c)
return pg_ascii_tolower(c);
#ifdef HAVE_LOCALE_T
else if (locale)
return tolower_l(c, locale);
#endif
else
return pg_tolower(c);
}
#define NextByte(p, plen) ((p)++, (plen)--) #define NextByte(p, plen) ((p)++, (plen)--)
/* Set up to compile like_match.c for multibyte characters */ /* Set up to compile like_match.c for multibyte characters */
...@@ -107,7 +131,7 @@ wchareq(char *p1, char *p2) ...@@ -107,7 +131,7 @@ wchareq(char *p1, char *p2)
#include "like_match.c" #include "like_match.c"
/* setup to compile like_match.c for single byte case insensitive matches */ /* setup to compile like_match.c for single byte case insensitive matches */
#define MATCH_LOWER #define MATCH_LOWER(t) SB_lower_char((unsigned char) (t), locale, locale_is_c)
#define NextChar(p, plen) NextByte((p), (plen)) #define NextChar(p, plen) NextByte((p), (plen))
#define MatchText SB_IMatchText #define MatchText SB_IMatchText
...@@ -121,15 +145,16 @@ wchareq(char *p1, char *p2) ...@@ -121,15 +145,16 @@ wchareq(char *p1, char *p2)
#include "like_match.c" #include "like_match.c"
/* Generic for all cases not requiring inline case-folding */
static inline int static inline int
GenericMatchText(char *s, int slen, char *p, int plen) GenericMatchText(char *s, int slen, char *p, int plen)
{ {
if (pg_database_encoding_max_length() == 1) if (pg_database_encoding_max_length() == 1)
return SB_MatchText(s, slen, p, plen); return SB_MatchText(s, slen, p, plen, 0, true);
else if (GetDatabaseEncoding() == PG_UTF8) else if (GetDatabaseEncoding() == PG_UTF8)
return UTF8_MatchText(s, slen, p, plen); return UTF8_MatchText(s, slen, p, plen, 0, true);
else else
return MB_MatchText(s, slen, p, plen); return MB_MatchText(s, slen, p, plen, 0, true);
} }
static inline int static inline int
...@@ -142,8 +167,8 @@ Generic_Text_IC_like(text *str, text *pat, Oid collation) ...@@ -142,8 +167,8 @@ Generic_Text_IC_like(text *str, text *pat, Oid collation)
/* /*
* For efficiency reasons, in the single byte case we don't call lower() * For efficiency reasons, in the single byte case we don't call lower()
* on the pattern and text, but instead call to_lower on each character. * on the pattern and text, but instead call SB_lower_char on each
* In the multi-byte case we don't have much choice :-( * character. In the multi-byte case we don't have much choice :-(
*/ */
if (pg_database_encoding_max_length() > 1) if (pg_database_encoding_max_length() > 1)
...@@ -156,17 +181,42 @@ Generic_Text_IC_like(text *str, text *pat, Oid collation) ...@@ -156,17 +181,42 @@ Generic_Text_IC_like(text *str, text *pat, Oid collation)
s = VARDATA(str); s = VARDATA(str);
slen = (VARSIZE(str) - VARHDRSZ); slen = (VARSIZE(str) - VARHDRSZ);
if (GetDatabaseEncoding() == PG_UTF8) if (GetDatabaseEncoding() == PG_UTF8)
return UTF8_MatchText(s, slen, p, plen); return UTF8_MatchText(s, slen, p, plen, 0, true);
else else
return MB_MatchText(s, slen, p, plen); return MB_MatchText(s, slen, p, plen, 0, true);
} }
else else
{ {
/*
* Here we need to prepare locale information for SB_lower_char.
* This should match the methods used in str_tolower().
*/
pg_locale_t locale = 0;
bool locale_is_c = false;
if (lc_ctype_is_c(collation))
locale_is_c = true;
else if (collation != DEFAULT_COLLATION_OID)
{
if (!OidIsValid(collation))
{
/*
* This typically means that the parser could not resolve a
* conflict of implicit collations, so report it that way.
*/
ereport(ERROR,
(errcode(ERRCODE_INDETERMINATE_COLLATION),
errmsg("could not determine which collation to use for ILIKE"),
errhint("Use the COLLATE clause to set the collation explicitly.")));
}
locale = pg_newlocale_from_collation(collation);
}
p = VARDATA_ANY(pat); p = VARDATA_ANY(pat);
plen = VARSIZE_ANY_EXHDR(pat); plen = VARSIZE_ANY_EXHDR(pat);
s = VARDATA_ANY(str); s = VARDATA_ANY(str);
slen = VARSIZE_ANY_EXHDR(str); slen = VARSIZE_ANY_EXHDR(str);
return SB_IMatchText(s, slen, p, plen); return SB_IMatchText(s, slen, p, plen, locale, locale_is_c);
} }
} }
...@@ -274,7 +324,7 @@ bytealike(PG_FUNCTION_ARGS) ...@@ -274,7 +324,7 @@ bytealike(PG_FUNCTION_ARGS)
p = VARDATA_ANY(pat); p = VARDATA_ANY(pat);
plen = VARSIZE_ANY_EXHDR(pat); plen = VARSIZE_ANY_EXHDR(pat);
result = (SB_MatchText(s, slen, p, plen) == LIKE_TRUE); result = (SB_MatchText(s, slen, p, plen, 0, true) == LIKE_TRUE);
PG_RETURN_BOOL(result); PG_RETURN_BOOL(result);
} }
...@@ -295,7 +345,7 @@ byteanlike(PG_FUNCTION_ARGS) ...@@ -295,7 +345,7 @@ byteanlike(PG_FUNCTION_ARGS)
p = VARDATA_ANY(pat); p = VARDATA_ANY(pat);
plen = VARSIZE_ANY_EXHDR(pat); plen = VARSIZE_ANY_EXHDR(pat);
result = (SB_MatchText(s, slen, p, plen) != LIKE_TRUE); result = (SB_MatchText(s, slen, p, plen, 0, true) != LIKE_TRUE);
PG_RETURN_BOOL(result); PG_RETURN_BOOL(result);
} }
......
...@@ -5,7 +5,7 @@ ...@@ -5,7 +5,7 @@
* *
* This file is included by like.c four times, to provide matching code for * This file is included by like.c four times, to provide matching code for
* (1) single-byte encodings, (2) UTF8, (3) other multi-byte encodings, * (1) single-byte encodings, (2) UTF8, (3) other multi-byte encodings,
* and (4) case insensitive matches in single byte encodings. * and (4) case insensitive matches in single-byte encodings.
* (UTF8 is a special case because we can use a much more efficient version * (UTF8 is a special case because we can use a much more efficient version
* of NextChar than can be used for general multi-byte encodings.) * of NextChar than can be used for general multi-byte encodings.)
* *
...@@ -14,7 +14,7 @@ ...@@ -14,7 +14,7 @@
* NextChar * NextChar
* MatchText - to name of function wanted * MatchText - to name of function wanted
* do_like_escape - name of function if wanted - needs CHAREQ and CopyAdvChar * do_like_escape - name of function if wanted - needs CHAREQ and CopyAdvChar
* MATCH_LOWER - define for case (4), using to_lower on single-byte chars * MATCH_LOWER - define for case (4) to specify case folding for 1-byte chars
* *
* Copyright (c) 1996-2011, PostgreSQL Global Development Group * Copyright (c) 1996-2011, PostgreSQL Global Development Group
* *
...@@ -70,13 +70,14 @@ ...@@ -70,13 +70,14 @@
*/ */
#ifdef MATCH_LOWER #ifdef MATCH_LOWER
#define GETCHAR(t) ((char) tolower((unsigned char) (t))) #define GETCHAR(t) MATCH_LOWER(t)
#else #else
#define GETCHAR(t) (t) #define GETCHAR(t) (t)
#endif #endif
static int static int
MatchText(char *t, int tlen, char *p, int plen) MatchText(char *t, int tlen, char *p, int plen,
pg_locale_t locale, bool locale_is_c)
{ {
/* Fast path for match-everything pattern */ /* Fast path for match-everything pattern */
if (plen == 1 && *p == '%') if (plen == 1 && *p == '%')
...@@ -170,7 +171,8 @@ MatchText(char *t, int tlen, char *p, int plen) ...@@ -170,7 +171,8 @@ MatchText(char *t, int tlen, char *p, int plen)
{ {
if (GETCHAR(*t) == firstpat) if (GETCHAR(*t) == firstpat)
{ {
int matched = MatchText(t, tlen, p, plen); int matched = MatchText(t, tlen, p, plen,
locale, locale_is_c);
if (matched != LIKE_FALSE) if (matched != LIKE_FALSE)
return matched; /* TRUE or ABORT */ return matched; /* TRUE or ABORT */
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment