Commit d8f37521 authored by Tom Lane's avatar Tom Lane

Generate double-sided LIKE indexquals that work even in weird locales,

by continuing to increment the rightmost character until we get a string
that is demonstrably greater than the pattern prefix.
parent 5f68d5c3
...@@ -8,7 +8,7 @@ ...@@ -8,7 +8,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/optimizer/path/indxpath.c,v 1.74 1999/12/31 03:41:03 tgl Exp $ * $Header: /cvsroot/pgsql/src/backend/optimizer/path/indxpath.c,v 1.75 1999/12/31 05:38:25 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -23,6 +23,7 @@ ...@@ -23,6 +23,7 @@
#include "catalog/pg_amop.h" #include "catalog/pg_amop.h"
#include "catalog/pg_operator.h" #include "catalog/pg_operator.h"
#include "executor/executor.h" #include "executor/executor.h"
#include "mb/pg_wchar.h"
#include "nodes/makefuncs.h" #include "nodes/makefuncs.h"
#include "nodes/nodeFuncs.h" #include "nodes/nodeFuncs.h"
#include "optimizer/clauses.h" #include "optimizer/clauses.h"
...@@ -92,7 +93,12 @@ static Prefix_Status regex_fixed_prefix(char *patt, bool case_insensitive, ...@@ -92,7 +93,12 @@ static Prefix_Status regex_fixed_prefix(char *patt, bool case_insensitive,
char **prefix); char **prefix);
static List *prefix_quals(Var *leftop, Oid expr_op, static List *prefix_quals(Var *leftop, Oid expr_op,
char *prefix, Prefix_Status pstatus); char *prefix, Prefix_Status pstatus);
static char *make_greater_string(const char * str, Oid datatype);
static Oid find_operator(const char * opname, Oid datatype); static Oid find_operator(const char * opname, Oid datatype);
static Datum string_to_datum(const char * str, Oid datatype);
static Const *string_to_const(const char * str, Oid datatype);
static bool string_lessthan(const char * str1, const char * str2,
Oid datatype);
/* /*
...@@ -1653,7 +1659,7 @@ match_special_index_operator(Expr *clause, Oid opclass, Oid relam, ...@@ -1653,7 +1659,7 @@ match_special_index_operator(Expr *clause, Oid opclass, Oid relam,
case OID_TEXT_REGEXEQ_OP: case OID_TEXT_REGEXEQ_OP:
case OID_TEXT_ICREGEXEQ_OP: case OID_TEXT_ICREGEXEQ_OP:
if (! op_class(find_operator(">=", TEXTOID), opclass, relam) || if (! op_class(find_operator(">=", TEXTOID), opclass, relam) ||
! op_class(find_operator("<=", TEXTOID), opclass, relam)) ! op_class(find_operator("<", TEXTOID), opclass, relam))
isIndexable = false; isIndexable = false;
break; break;
...@@ -1661,7 +1667,7 @@ match_special_index_operator(Expr *clause, Oid opclass, Oid relam, ...@@ -1661,7 +1667,7 @@ match_special_index_operator(Expr *clause, Oid opclass, Oid relam,
case OID_BPCHAR_REGEXEQ_OP: case OID_BPCHAR_REGEXEQ_OP:
case OID_BPCHAR_ICREGEXEQ_OP: case OID_BPCHAR_ICREGEXEQ_OP:
if (! op_class(find_operator(">=", BPCHAROID), opclass, relam) || if (! op_class(find_operator(">=", BPCHAROID), opclass, relam) ||
! op_class(find_operator("<=", BPCHAROID), opclass, relam)) ! op_class(find_operator("<", BPCHAROID), opclass, relam))
isIndexable = false; isIndexable = false;
break; break;
...@@ -1669,7 +1675,7 @@ match_special_index_operator(Expr *clause, Oid opclass, Oid relam, ...@@ -1669,7 +1675,7 @@ match_special_index_operator(Expr *clause, Oid opclass, Oid relam,
case OID_VARCHAR_REGEXEQ_OP: case OID_VARCHAR_REGEXEQ_OP:
case OID_VARCHAR_ICREGEXEQ_OP: case OID_VARCHAR_ICREGEXEQ_OP:
if (! op_class(find_operator(">=", VARCHAROID), opclass, relam) || if (! op_class(find_operator(">=", VARCHAROID), opclass, relam) ||
! op_class(find_operator("<=", VARCHAROID), opclass, relam)) ! op_class(find_operator("<", VARCHAROID), opclass, relam))
isIndexable = false; isIndexable = false;
break; break;
...@@ -1677,7 +1683,7 @@ match_special_index_operator(Expr *clause, Oid opclass, Oid relam, ...@@ -1677,7 +1683,7 @@ match_special_index_operator(Expr *clause, Oid opclass, Oid relam,
case OID_NAME_REGEXEQ_OP: case OID_NAME_REGEXEQ_OP:
case OID_NAME_ICREGEXEQ_OP: case OID_NAME_ICREGEXEQ_OP:
if (! op_class(find_operator(">=", NAMEOID), opclass, relam) || if (! op_class(find_operator(">=", NAMEOID), opclass, relam) ||
! op_class(find_operator("<=", NAMEOID), opclass, relam)) ! op_class(find_operator("<", NAMEOID), opclass, relam))
isIndexable = false; isIndexable = false;
break; break;
} }
...@@ -1774,7 +1780,7 @@ expand_indexqual_conditions(List *indexquals) ...@@ -1774,7 +1780,7 @@ expand_indexqual_conditions(List *indexquals)
/* /*
* Extract the fixed prefix, if any, for a LIKE pattern. * Extract the fixed prefix, if any, for a LIKE pattern.
* *prefix is set to a palloc'd prefix string with 1 spare byte, * *prefix is set to a palloc'd prefix string,
* or to NULL if no fixed prefix exists for the pattern. * or to NULL if no fixed prefix exists for the pattern.
* The return value distinguishes no fixed prefix, a partial prefix, * The return value distinguishes no fixed prefix, a partial prefix,
* or an exact-match-only pattern. * or an exact-match-only pattern.
...@@ -1786,7 +1792,7 @@ like_fixed_prefix(char *patt, char **prefix) ...@@ -1786,7 +1792,7 @@ like_fixed_prefix(char *patt, char **prefix)
int pos, int pos,
match_pos; match_pos;
*prefix = match = palloc(strlen(patt)+2); *prefix = match = palloc(strlen(patt)+1);
match_pos = 0; match_pos = 0;
for (pos = 0; patt[pos]; pos++) for (pos = 0; patt[pos]; pos++)
...@@ -1823,7 +1829,7 @@ like_fixed_prefix(char *patt, char **prefix) ...@@ -1823,7 +1829,7 @@ like_fixed_prefix(char *patt, char **prefix)
/* /*
* Extract the fixed prefix, if any, for a regex pattern. * Extract the fixed prefix, if any, for a regex pattern.
* *prefix is set to a palloc'd prefix string with 1 spare byte, * *prefix is set to a palloc'd prefix string,
* or to NULL if no fixed prefix exists for the pattern. * or to NULL if no fixed prefix exists for the pattern.
* The return value distinguishes no fixed prefix, a partial prefix, * The return value distinguishes no fixed prefix, a partial prefix,
* or an exact-match-only pattern. * or an exact-match-only pattern.
...@@ -1858,7 +1864,7 @@ regex_fixed_prefix(char *patt, bool case_insensitive, ...@@ -1858,7 +1864,7 @@ regex_fixed_prefix(char *patt, bool case_insensitive,
} }
/* OK, allocate space for pattern */ /* OK, allocate space for pattern */
*prefix = match = palloc(strlen(patt)+2); *prefix = match = palloc(strlen(patt)+1);
match_pos = 0; match_pos = 0;
/* note start at pos 1 to skip leading ^ */ /* note start at pos 1 to skip leading ^ */
...@@ -1906,11 +1912,10 @@ prefix_quals(Var *leftop, Oid expr_op, ...@@ -1906,11 +1912,10 @@ prefix_quals(Var *leftop, Oid expr_op,
List *result; List *result;
Oid datatype; Oid datatype;
Oid oproid; Oid oproid;
void *conval;
Const *con; Const *con;
Oper *op; Oper *op;
Expr *expr; Expr *expr;
int prefixlen; char *greaterstr;
Assert(pstatus != Prefix_None); Assert(pstatus != Prefix_None);
...@@ -1953,14 +1958,7 @@ prefix_quals(Var *leftop, Oid expr_op, ...@@ -1953,14 +1958,7 @@ prefix_quals(Var *leftop, Oid expr_op,
oproid = find_operator("=", datatype); oproid = find_operator("=", datatype);
if (oproid == InvalidOid) if (oproid == InvalidOid)
elog(ERROR, "prefix_quals: no = operator for type %u", datatype); elog(ERROR, "prefix_quals: no = operator for type %u", datatype);
/* Note: we cheat a little by assuming that textin() will do for con = string_to_const(prefix, datatype);
* bpchar and varchar constants too...
*/
conval = (datatype == NAMEOID) ?
(void*) namein(prefix) : (void*) textin(prefix);
con = makeConst(datatype, ((datatype == NAMEOID) ? NAMEDATALEN : -1),
PointerGetDatum(conval),
false, false, false, false);
op = makeOper(oproid, InvalidOid, BOOLOID, 0, NULL); op = makeOper(oproid, InvalidOid, BOOLOID, 0, NULL);
expr = make_opclause(op, leftop, (Var *) con); expr = make_opclause(op, leftop, (Var *) con);
result = lcons(expr, NIL); result = lcons(expr, NIL);
...@@ -1975,43 +1973,92 @@ prefix_quals(Var *leftop, Oid expr_op, ...@@ -1975,43 +1973,92 @@ prefix_quals(Var *leftop, Oid expr_op,
oproid = find_operator(">=", datatype); oproid = find_operator(">=", datatype);
if (oproid == InvalidOid) if (oproid == InvalidOid)
elog(ERROR, "prefix_quals: no >= operator for type %u", datatype); elog(ERROR, "prefix_quals: no >= operator for type %u", datatype);
conval = (datatype == NAMEOID) ? con = string_to_const(prefix, datatype);
(void*) namein(prefix) : (void*) textin(prefix);
con = makeConst(datatype, ((datatype == NAMEOID) ? NAMEDATALEN : -1),
PointerGetDatum(conval),
false, false, false, false);
op = makeOper(oproid, InvalidOid, BOOLOID, 0, NULL); op = makeOper(oproid, InvalidOid, BOOLOID, 0, NULL);
expr = make_opclause(op, leftop, (Var *) con); expr = make_opclause(op, leftop, (Var *) con);
result = lcons(expr, NIL); result = lcons(expr, NIL);
/* /*
* In ASCII locale we say "x <= prefix\377". This does not * If we can create a string larger than the prefix, say "x < greaterstr".
* work for non-ASCII collation orders, and it's not really
* right even for ASCII. FIX ME!
* Note we assume the passed prefix string is workspace with
* an extra byte, as created by the xxx_fixed_prefix routines above.
*/ */
#ifndef USE_LOCALE greaterstr = make_greater_string(prefix, datatype);
prefixlen = strlen(prefix); if (greaterstr)
prefix[prefixlen] = '\377'; {
prefix[prefixlen+1] = '\0'; oproid = find_operator("<", datatype);
if (oproid == InvalidOid)
elog(ERROR, "prefix_quals: no < operator for type %u", datatype);
con = string_to_const(greaterstr, datatype);
op = makeOper(oproid, InvalidOid, BOOLOID, 0, NULL);
expr = make_opclause(op, leftop, (Var *) con);
result = lappend(result, expr);
pfree(greaterstr);
}
oproid = find_operator("<=", datatype); return result;
if (oproid == InvalidOid) }
elog(ERROR, "prefix_quals: no <= operator for type %u", datatype);
conval = (datatype == NAMEOID) ? /*
(void*) namein(prefix) : (void*) textin(prefix); * Try to generate a string greater than the given string or any string it is
con = makeConst(datatype, ((datatype == NAMEOID) ? NAMEDATALEN : -1), * a prefix of. If successful, return a palloc'd string; else return NULL.
PointerGetDatum(conval), *
false, false, false, false); * To work correctly in non-ASCII locales with weird collation orders,
op = makeOper(oproid, InvalidOid, BOOLOID, 0, NULL); * we cannot simply increment "foo" to "fop" --- we have to check whether
expr = make_opclause(op, leftop, (Var *) con); * we actually produced a string greater than the given one. If not,
result = lappend(result, expr); * increment the righthand byte again and repeat. If we max out the righthand
* byte, truncate off the last character and start incrementing the next.
* For example, if "z" were the last character in the sort order, then we
* could produce "foo" as a string greater than "fonz".
*
* This could be rather slow in the worst case, but in most cases we won't
* have to try more than one or two strings before succeeding.
*
* XXX in a sufficiently weird locale, this might produce incorrect results?
* For example, in German I believe "ss" is treated specially --- if we are
* given "foos" and return "foot", will this actually be greater than "fooss"?
*/
static char *
make_greater_string(const char * str, Oid datatype)
{
char *workstr;
int len;
/* Make a modifiable copy, which will be our return value if successful */
workstr = pstrdup((char *) str);
while ((len = strlen(workstr)) > 0)
{
unsigned char *lastchar = (unsigned char *) (workstr + len - 1);
/*
* Try to generate a larger string by incrementing the last byte.
*/
while (*lastchar < (unsigned char) 255)
{
(*lastchar)++;
if (string_lessthan(str, workstr, datatype))
return workstr; /* Success! */
}
/*
* Truncate off the last character, which might be more than 1 byte
* in MULTIBYTE case.
*/
#ifdef MULTIBYTE
len = pg_mbcliplen((const unsigned char *) workstr, len, len-1);
workstr[len] = '\0';
#else
*lastchar = '\0';
#endif #endif
}
return result; /* Failed... */
pfree(workstr);
return NULL;
} }
/*
* Handy subroutines for match_special_index_operator() and friends.
*/
/* See if there is a binary op of the given name for the given datatype */ /* See if there is a binary op of the given name for the given datatype */
static Oid static Oid
find_operator(const char * opname, Oid datatype) find_operator(const char * opname, Oid datatype)
...@@ -2027,3 +2074,74 @@ find_operator(const char * opname, Oid datatype) ...@@ -2027,3 +2074,74 @@ find_operator(const char * opname, Oid datatype)
return InvalidOid; return InvalidOid;
return optup->t_data->t_oid; return optup->t_data->t_oid;
} }
/*
* Generate a Datum of the appropriate type from a C string.
* Note that all of the supported types are pass-by-ref, so the
* returned value should be pfree'd if no longer needed.
*/
static Datum
string_to_datum(const char * str, Oid datatype)
{
/* We cheat a little by assuming that textin() will do for
* bpchar and varchar constants too...
*/
if (datatype == NAMEOID)
return PointerGetDatum(namein((char *) str));
else
return PointerGetDatum(textin((char *) str));
}
/*
* Generate a Const node of the appropriate type from a C string.
*/
static Const *
string_to_const(const char * str, Oid datatype)
{
Datum conval = string_to_datum(str, datatype);
return makeConst(datatype, ((datatype == NAMEOID) ? NAMEDATALEN : -1),
conval, false, false, false, false);
}
/*
* Test whether two strings are "<" according to the rules of the given
* datatype. We do this the hard way, ie, actually calling the type's
* "<" operator function, to ensure we get the right result...
*/
static bool
string_lessthan(const char * str1, const char * str2, Oid datatype)
{
Datum datum1 = string_to_datum(str1, datatype);
Datum datum2 = string_to_datum(str2, datatype);
bool result;
switch (datatype)
{
case TEXTOID:
result = text_lt((text *) datum1, (text *) datum2);
break;
case BPCHAROID:
result = bpcharlt((char *) datum1, (char *) datum2);
break;
case VARCHAROID:
result = varcharlt((char *) datum1, (char *) datum2);
break;
case NAMEOID:
result = namelt((NameData *) datum1, (NameData *) datum2);
break;
default:
elog(ERROR, "string_lessthan: unexpected datatype %u", datatype);
result = false;
break;
}
pfree(DatumGetPointer(datum1));
pfree(DatumGetPointer(datum2));
return result;
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment