Commit 2de946be authored by Tom Lane's avatar Tom Lane

Improve the performance of LIKE/regex estimation in non-C locales, by making

make_greater_string() try harder to generate a string that's actually greater
than its input string.  Before we just assumed that making a string that was
memcmp-greater was enough, but it is easy to generate examples where this is
not so when the locale is not C.  Instead, loop until the relevant comparison
function agrees that the generated string is greater than the input.

Unfortunately this is probably not enough to guarantee that the generated
string is greater than all extensions of the input, so we cannot relax the
restriction to C locale for the LIKE/regex index optimization.  But it should
at least improve the odds of getting a useful selectivity estimate in
prefix_selectivity().  Per example from Guillaume Smet.

Backpatch to 8.1, mainly because that's what the complainant is using...
parent 95422871
...@@ -9,7 +9,7 @@ ...@@ -9,7 +9,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/optimizer/path/indxpath.c,v 1.222 2007/05/22 01:40:33 tgl Exp $ * $PostgreSQL: pgsql/src/backend/optimizer/path/indxpath.c,v 1.223 2007/11/07 22:37:24 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -2668,6 +2668,7 @@ prefix_quals(Node *leftop, Oid opfamily, ...@@ -2668,6 +2668,7 @@ prefix_quals(Node *leftop, Oid opfamily,
Oid datatype; Oid datatype;
Oid oproid; Oid oproid;
Expr *expr; Expr *expr;
FmgrInfo ltproc;
Const *greaterstr; Const *greaterstr;
Assert(pstatus != Pattern_Prefix_None); Assert(pstatus != Pattern_Prefix_None);
...@@ -2759,13 +2760,14 @@ prefix_quals(Node *leftop, Oid opfamily, ...@@ -2759,13 +2760,14 @@ prefix_quals(Node *leftop, Oid opfamily,
* "x < greaterstr". * "x < greaterstr".
*------- *-------
*/ */
greaterstr = make_greater_string(prefix_const); oproid = get_opfamily_member(opfamily, datatype, datatype,
BTLessStrategyNumber);
if (oproid == InvalidOid)
elog(ERROR, "no < operator for opfamily %u", opfamily);
fmgr_info(get_opcode(oproid), &ltproc);
greaterstr = make_greater_string(prefix_const, &ltproc);
if (greaterstr) if (greaterstr)
{ {
oproid = get_opfamily_member(opfamily, datatype, datatype,
BTLessStrategyNumber);
if (oproid == InvalidOid)
elog(ERROR, "no < operator for opfamily %u", opfamily);
expr = make_opclause(oproid, BOOLOID, false, expr = make_opclause(oproid, BOOLOID, false,
(Expr *) leftop, (Expr *) greaterstr); (Expr *) leftop, (Expr *) greaterstr);
result = lappend(result, result = lappend(result,
......
...@@ -15,7 +15,7 @@ ...@@ -15,7 +15,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/utils/adt/selfuncs.c,v 1.237 2007/11/07 21:00:37 tgl Exp $ * $PostgreSQL: pgsql/src/backend/utils/adt/selfuncs.c,v 1.238 2007/11/07 22:37:24 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -4302,17 +4302,17 @@ prefix_selectivity(VariableStatData *vardata, ...@@ -4302,17 +4302,17 @@ prefix_selectivity(VariableStatData *vardata,
* "x < greaterstr". * "x < greaterstr".
*------- *-------
*/ */
greaterstrcon = make_greater_string(prefixcon); cmpopr = get_opfamily_member(opfamily, vartype, vartype,
BTLessStrategyNumber);
if (cmpopr == InvalidOid)
elog(ERROR, "no < operator for opfamily %u", opfamily);
fmgr_info(get_opcode(cmpopr), &opproc);
greaterstrcon = make_greater_string(prefixcon, &opproc);
if (greaterstrcon) if (greaterstrcon)
{ {
Selectivity topsel; Selectivity topsel;
cmpopr = get_opfamily_member(opfamily, vartype, vartype,
BTLessStrategyNumber);
if (cmpopr == InvalidOid)
elog(ERROR, "no < operator for opfamily %u", opfamily);
fmgr_info(get_opcode(cmpopr), &opproc);
topsel = ineq_histogram_selectivity(vardata, &opproc, false, topsel = ineq_histogram_selectivity(vardata, &opproc, false,
greaterstrcon->constvalue, greaterstrcon->constvalue,
greaterstrcon->consttype); greaterstrcon->consttype);
...@@ -4589,8 +4589,17 @@ pattern_selectivity(Const *patt, Pattern_Type ptype) ...@@ -4589,8 +4589,17 @@ pattern_selectivity(Const *patt, Pattern_Type ptype)
* in the form of a Const pointer; else return NULL. * in the form of a Const pointer; else return NULL.
* *
* The key requirement here is that given a prefix string, say "foo", * The key requirement here is that given a prefix string, say "foo",
* we must be able to generate another string "fop" that is greater * we must be able to generate another string "fop" that is greater than
* than all strings "foobar" starting with "foo". * all strings "foobar" starting with "foo". We can test that we have
* generated a string greater than the prefix string, but in non-C locales
* that is not a bulletproof guarantee that an extension of the string might
* not sort after it; an example is that "foo " is less than "foo!", but it
* is not clear that a "dictionary" sort ordering will consider "foo!" less
* than "foo bar". Therefore, this function should be used only for
* estimation purposes when working in a non-C locale.
*
* The caller must provide the appropriate "less than" comparison function
* for testing the strings.
* *
* If we max out the righthand byte, truncate off the last character * If we max out the righthand byte, truncate off the last character
* and start incrementing the next. For example, if "z" were the last * and start incrementing the next. For example, if "z" were the last
...@@ -4599,20 +4608,15 @@ pattern_selectivity(Const *patt, Pattern_Type ptype) ...@@ -4599,20 +4608,15 @@ pattern_selectivity(Const *patt, Pattern_Type ptype)
* *
* This could be rather slow in the worst case, but in most cases we * This could be rather slow in the worst case, but in most cases we
* won't have to try more than one or two strings before succeeding. * won't have to try more than one or two strings before succeeding.
*
* NOTE: at present this assumes we are in the C locale, so that simple
* bytewise comparison applies. However, we might be in a multibyte
* encoding such as UTF8, so we do have to watch out for generating
* invalid encoding sequences.
*/ */
Const * Const *
make_greater_string(const Const *str_const) make_greater_string(const Const *str_const, FmgrInfo *ltproc)
{ {
Oid datatype = str_const->consttype; Oid datatype = str_const->consttype;
char *workstr; char *workstr;
int len; int len;
/* Get the string and a modifiable copy */ /* Get a modifiable copy of the string in C-string format */
if (datatype == NAMEOID) if (datatype == NAMEOID)
{ {
workstr = DatumGetCString(DirectFunctionCall1(nameout, workstr = DatumGetCString(DirectFunctionCall1(nameout,
...@@ -4660,8 +4664,18 @@ make_greater_string(const Const *str_const) ...@@ -4660,8 +4664,18 @@ make_greater_string(const Const *str_const)
else else
workstr_const = string_to_bytea_const(workstr, len); workstr_const = string_to_bytea_const(workstr, len);
pfree(workstr); if (DatumGetBool(FunctionCall2(ltproc,
return workstr_const; str_const->constvalue,
workstr_const->constvalue)))
{
/* Successfully made a string larger than the input */
pfree(workstr);
return workstr_const;
}
/* No good, release unusable value and try again */
pfree(DatumGetPointer(workstr_const->constvalue));
pfree(workstr_const);
} }
/* restore last byte so we don't confuse pg_mbcliplen */ /* restore last byte so we don't confuse pg_mbcliplen */
......
...@@ -8,7 +8,7 @@ ...@@ -8,7 +8,7 @@
* Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group * Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
* *
* $PostgreSQL: pgsql/src/include/utils/selfuncs.h,v 1.40 2007/08/31 23:35:22 tgl Exp $ * $PostgreSQL: pgsql/src/include/utils/selfuncs.h,v 1.41 2007/11/07 22:37:24 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -118,7 +118,7 @@ extern Pattern_Prefix_Status pattern_fixed_prefix(Const *patt, ...@@ -118,7 +118,7 @@ extern Pattern_Prefix_Status pattern_fixed_prefix(Const *patt,
Pattern_Type ptype, Pattern_Type ptype,
Const **prefix, Const **prefix,
Const **rest); Const **rest);
extern Const *make_greater_string(const Const *str_const); extern Const *make_greater_string(const Const *str_const, FmgrInfo *ltproc);
extern Datum eqsel(PG_FUNCTION_ARGS); extern Datum eqsel(PG_FUNCTION_ARGS);
extern Datum neqsel(PG_FUNCTION_ARGS); extern Datum neqsel(PG_FUNCTION_ARGS);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment