Commit b8c798eb authored by Tom Lane's avatar Tom Lane

Tweak tsmatchsel() so that it examines the structure of the tsquery whenever

possible (ie, whenever the tsquery is a constant), even when no statistics
are available for the tsvector.  For example, foo @@ 'a & b'::tsquery
can be expected to be more selective than foo @@ 'a'::tsquery, whether
or not we know anything about foo.  We use DEFAULT_TS_MATCH_SEL as the assumed
selectivity of individual query terms when no stats are available, then
combine the terms according to the query's AND/OR structure as usual.

Per experimentation with Artur Dabrowski's example.  (The fact that there
are no stats available in that example is a problem in itself, but
nonetheless tsmatchsel should be smarter about the case.)

Back-patch to 8.4 to keep all versions of tsmatchsel() in sync.
parent 2ab57e08
...@@ -7,7 +7,7 @@ ...@@ -7,7 +7,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/tsearch/ts_selfuncs.c,v 1.7 2010/01/04 02:44:39 tgl Exp $ * $PostgreSQL: pgsql/src/backend/tsearch/ts_selfuncs.c,v 1.8 2010/07/31 03:27:40 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -52,6 +52,9 @@ static Selectivity tsquery_opr_selec(QueryItem *item, char *operand, ...@@ -52,6 +52,9 @@ static Selectivity tsquery_opr_selec(QueryItem *item, char *operand,
TextFreq *lookup, int length, float4 minfreq); TextFreq *lookup, int length, float4 minfreq);
static int compare_lexeme_textfreq(const void *e1, const void *e2); static int compare_lexeme_textfreq(const void *e1, const void *e2);
#define tsquery_opr_selec_no_stats(query) \
tsquery_opr_selec(GETQUERY(query), GETOPERAND(query), NULL, 0, 0)
/* /*
* tsmatchsel -- Selectivity of "@@" * tsmatchsel -- Selectivity of "@@"
...@@ -101,21 +104,20 @@ tsmatchsel(PG_FUNCTION_ARGS) ...@@ -101,21 +104,20 @@ tsmatchsel(PG_FUNCTION_ARGS)
} }
/* /*
* OK, there's a Var and a Const we're dealing with here. We need the Var * OK, there's a Var and a Const we're dealing with here. We need the
* to be a TSVector (or else we don't have any useful statistic for it). * Const to be a TSQuery, else we can't do anything useful. We have to
* We have to check this because the Var might be the TSQuery not the * check this because the Var might be the TSQuery not the TSVector.
* TSVector.
*/ */
if (vardata.vartype == TSVECTOROID) if (((Const *) other)->consttype == TSQUERYOID)
{ {
/* tsvector @@ tsquery or the other way around */ /* tsvector @@ tsquery or the other way around */
Assert(((Const *) other)->consttype == TSQUERYOID); Assert(vardata.vartype == TSVECTOROID);
selec = tsquerysel(&vardata, ((Const *) other)->constvalue); selec = tsquerysel(&vardata, ((Const *) other)->constvalue);
} }
else else
{ {
/* The Var is something we don't have useful statistics for */ /* If we can't see the query structure, must punt */
selec = DEFAULT_TS_MATCH_SEL; selec = DEFAULT_TS_MATCH_SEL;
} }
...@@ -184,14 +186,14 @@ tsquerysel(VariableStatData *vardata, Datum constval) ...@@ -184,14 +186,14 @@ tsquerysel(VariableStatData *vardata, Datum constval)
} }
else else
{ {
/* No most-common-elements info, so we must punt */ /* No most-common-elements info, so do without */
selec = (Selectivity) DEFAULT_TS_MATCH_SEL; selec = tsquery_opr_selec_no_stats(query);
} }
} }
else else
{ {
/* No stats at all, so we must punt */ /* No stats at all, so do without */
selec = (Selectivity) DEFAULT_TS_MATCH_SEL; selec = tsquery_opr_selec_no_stats(query);
} }
return selec; return selec;
...@@ -214,7 +216,7 @@ mcelem_tsquery_selec(TSQuery query, Datum *mcelem, int nmcelem, ...@@ -214,7 +216,7 @@ mcelem_tsquery_selec(TSQuery query, Datum *mcelem, int nmcelem,
* cells are taken for minimal and maximal frequency. Punt if not. * cells are taken for minimal and maximal frequency. Punt if not.
*/ */
if (nnumbers != nmcelem + 2) if (nnumbers != nmcelem + 2)
return DEFAULT_TS_MATCH_SEL; return tsquery_opr_selec_no_stats(query);
/* /*
* Transpose the data into a single array so we can use bsearch(). * Transpose the data into a single array so we can use bsearch().
...@@ -258,9 +260,12 @@ mcelem_tsquery_selec(TSQuery query, Datum *mcelem, int nmcelem, ...@@ -258,9 +260,12 @@ mcelem_tsquery_selec(TSQuery query, Datum *mcelem, int nmcelem,
* freq[val] in VAL nodes, if the value is in MCELEM * freq[val] in VAL nodes, if the value is in MCELEM
* min(freq[MCELEM]) / 2 in VAL nodes, if it is not * min(freq[MCELEM]) / 2 in VAL nodes, if it is not
* *
*
* The MCELEM array is already sorted (see ts_typanalyze.c), so we can use * The MCELEM array is already sorted (see ts_typanalyze.c), so we can use
* binary search for determining freq[MCELEM]. * binary search for determining freq[MCELEM].
*
* If we don't have stats for the tsvector, we still use this logic,
* except we always use DEFAULT_TS_MATCH_SEL for VAL nodes. This case
* is signaled by lookup == NULL.
*/ */
static Selectivity static Selectivity
tsquery_opr_selec(QueryItem *item, char *operand, tsquery_opr_selec(QueryItem *item, char *operand,
...@@ -279,6 +284,10 @@ tsquery_opr_selec(QueryItem *item, char *operand, ...@@ -279,6 +284,10 @@ tsquery_opr_selec(QueryItem *item, char *operand,
{ {
QueryOperand *oper = (QueryOperand *) item; QueryOperand *oper = (QueryOperand *) item;
/* If no stats for the variable, use DEFAULT_TS_MATCH_SEL */
if (lookup == NULL)
return (Selectivity) DEFAULT_TS_MATCH_SEL;
/* /*
* Prepare the key for bsearch(). * Prepare the key for bsearch().
*/ */
...@@ -300,7 +309,7 @@ tsquery_opr_selec(QueryItem *item, char *operand, ...@@ -300,7 +309,7 @@ tsquery_opr_selec(QueryItem *item, char *operand,
else else
{ {
/* /*
* The element is not in MCELEM. Punt, but assert that the * The element is not in MCELEM. Punt, but assume that the
* selectivity cannot be more than minfreq / 2. * selectivity cannot be more than minfreq / 2.
*/ */
return (Selectivity) Min(DEFAULT_TS_MATCH_SEL, minfreq / 2); return (Selectivity) Min(DEFAULT_TS_MATCH_SEL, minfreq / 2);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment