Commit 4e57668d authored by Tom Lane's avatar Tom Lane

Create a selectivity estimation function for the text search @@ operator.

Jan Urbanski
parent e2b7d0c6
<!-- $PostgreSQL: pgsql/doc/src/sgml/catalogs.sgml,v 2.174 2008/09/15 18:43:41 tgl Exp $ --> <!-- $PostgreSQL: pgsql/doc/src/sgml/catalogs.sgml,v 2.175 2008/09/19 19:03:40 tgl Exp $ -->
<!-- <!--
Documentation of the system catalogs, directed toward PostgreSQL developers Documentation of the system catalogs, directed toward PostgreSQL developers
--> -->
...@@ -6664,6 +6664,9 @@ ...@@ -6664,6 +6664,9 @@
A list of the frequencies of the most common values or elements, A list of the frequencies of the most common values or elements,
i.e., number of occurrences of each divided by total number of rows. i.e., number of occurrences of each divided by total number of rows.
(NULL when <structfield>most_common_vals</structfield> is.) (NULL when <structfield>most_common_vals</structfield> is.)
For some datatypes such as <type>tsvector</>, it can also store some
additional information, making it longer than the
<structfield>most_common_vals</> array.
</entry> </entry>
</row> </row>
......
...@@ -4,7 +4,7 @@ ...@@ -4,7 +4,7 @@
# #
# Copyright (c) 2006-2008, PostgreSQL Global Development Group # Copyright (c) 2006-2008, PostgreSQL Global Development Group
# #
# $PostgreSQL: pgsql/src/backend/tsearch/Makefile,v 1.7 2008/07/14 00:51:45 tgl Exp $ # $PostgreSQL: pgsql/src/backend/tsearch/Makefile,v 1.8 2008/09/19 19:03:40 tgl Exp $
# #
#------------------------------------------------------------------------- #-------------------------------------------------------------------------
subdir = src/backend/tsearch subdir = src/backend/tsearch
...@@ -19,7 +19,7 @@ DICTFILES=synonym_sample.syn thesaurus_sample.ths hunspell_sample.affix \ ...@@ -19,7 +19,7 @@ DICTFILES=synonym_sample.syn thesaurus_sample.ths hunspell_sample.affix \
OBJS = ts_locale.o ts_parse.o wparser.o wparser_def.o dict.o \ OBJS = ts_locale.o ts_parse.o wparser.o wparser_def.o dict.o \
dict_simple.o dict_synonym.o dict_thesaurus.o \ dict_simple.o dict_synonym.o dict_thesaurus.o \
dict_ispell.o regis.o spell.o \ dict_ispell.o regis.o spell.o \
to_tsany.o ts_typanalyze.o ts_utils.o to_tsany.o ts_selfuncs.o ts_typanalyze.o ts_utils.o
include $(top_srcdir)/src/backend/common.mk include $(top_srcdir)/src/backend/common.mk
......
/*-------------------------------------------------------------------------
*
* ts_selfuncs.c
* Selectivity estimation functions for text search operators.
*
* Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/tsearch/ts_selfuncs.c,v 1.1 2008/09/19 19:03:40 tgl Exp $
*
*-------------------------------------------------------------------------
*/
#include "postgres.h"
#include "catalog/pg_statistic.h"
#include "catalog/pg_type.h"
#include "miscadmin.h"
#include "nodes/nodes.h"
#include "tsearch/ts_type.h"
#include "utils/lsyscache.h"
#include "utils/selfuncs.h"
#include "utils/syscache.h"
/*
* The default text search selectivity is chosen to be small enough to
* encourage indexscans for typical table densities. See selfuncs.h and
* DEFAULT_EQ_SEL for details.
*/
#define DEFAULT_TS_MATCH_SEL 0.005
/* lookup table type for binary searching through MCELEMs */
typedef struct
{
text *element;
float4 frequency;
} TextFreq;
/* type of keys for bsearch'ing through an array of TextFreqs */
typedef struct
{
char *lexeme;
int length;
} LexemeKey;
static Selectivity tsquerysel(VariableStatData *vardata, Datum constval);
static Selectivity mcelem_tsquery_selec(TSQuery query,
Datum *mcelem, int nmcelem,
float4 *numbers, int nnumbers);
static Selectivity tsquery_opr_selec(QueryItem *item, char *operand,
TextFreq *lookup, int length, float4 minfreq);
static int compare_lexeme_textfreq(const void *e1, const void *e2);
/*
* tsmatchsel -- Selectivity of "@@"
*
* restriction selectivity function for tsvector @@ tsquery and
* tsquery @@ tsvector
*/
Datum
tsmatchsel(PG_FUNCTION_ARGS)
{
PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
#ifdef NOT_USED
Oid operator = PG_GETARG_OID(1);
#endif
List *args = (List *) PG_GETARG_POINTER(2);
int varRelid = PG_GETARG_INT32(3);
VariableStatData vardata;
Node *other;
bool varonleft;
Selectivity selec;
/*
* If expression is not variable = something or something = variable, then
* punt and return a default estimate.
*/
if (!get_restriction_variable(root, args, varRelid,
&vardata, &other, &varonleft))
PG_RETURN_FLOAT8(DEFAULT_TS_MATCH_SEL);
/*
* Can't do anything useful if the something is not a constant, either.
*/
if (!IsA(other, Const))
{
ReleaseVariableStats(vardata);
PG_RETURN_FLOAT8(DEFAULT_TS_MATCH_SEL);
}
/*
* The "@@" operator is strict, so we can cope with NULL right away
*/
if (((Const *) other)->constisnull)
{
ReleaseVariableStats(vardata);
PG_RETURN_FLOAT8(0.0);
}
/*
* OK, there's a Var and a Const we're dealing with here. We need the Var
* to be a TSVector (or else we don't have any useful statistic for it).
* We have to check this because the Var might be the TSQuery not the
* TSVector.
*/
if (vardata.vartype == TSVECTOROID)
{
/* tsvector @@ tsquery or the other way around */
Assert(((Const *) other)->consttype == TSQUERYOID);
selec = tsquerysel(&vardata, ((Const *) other)->constvalue);
}
else
{
/* The Var is something we don't have useful statistics for */
selec = DEFAULT_TS_MATCH_SEL;
}
ReleaseVariableStats(vardata);
CLAMP_PROBABILITY(selec);
PG_RETURN_FLOAT8((float8) selec);
}
/*
* tsmatchjoinsel -- join selectivity of "@@"
*
* join selectivity function for tsvector @@ tsquery and tsquery @@ tsvector
*/
Datum
tsmatchjoinsel(PG_FUNCTION_ARGS)
{
/* for the moment we just punt */
PG_RETURN_FLOAT8(DEFAULT_TS_MATCH_SEL);
}
/*
* @@ selectivity for tsvector var vs tsquery constant
*/
static Selectivity
tsquerysel(VariableStatData *vardata, Datum constval)
{
Selectivity selec;
if (HeapTupleIsValid(vardata->statsTuple))
{
TSQuery query;
Form_pg_statistic stats;
Datum *values;
int nvalues;
float4 *numbers;
int nnumbers;
/* The caller made sure the const is a TSQuery, so get it now */
query = DatumGetTSQuery(constval);
stats = (Form_pg_statistic) GETSTRUCT(vardata->statsTuple);
/* MCELEM will be an array of TEXT elements for a tsvector column */
if (get_attstatsslot(vardata->statsTuple,
TEXTOID, -1,
STATISTIC_KIND_MCELEM, InvalidOid,
&values, &nvalues,
&numbers, &nnumbers))
{
/*
* There is a most-common-elements slot for the tsvector Var, so
* use that.
*/
selec = mcelem_tsquery_selec(query, values, nvalues,
numbers, nnumbers);
free_attstatsslot(TEXTOID, values, nvalues, numbers, nnumbers);
}
else
{
/* No most-common-elements info, so we must punt */
selec = (Selectivity) DEFAULT_TS_MATCH_SEL;
}
}
else
{
/* No stats at all, so we must punt */
selec = (Selectivity) DEFAULT_TS_MATCH_SEL;
}
return selec;
}
/*
* Extract data from the pg_statistic arrays into useful format.
*/
static Selectivity
mcelem_tsquery_selec(TSQuery query, Datum *mcelem, int nmcelem,
float4 *numbers, int nnumbers)
{
float4 minfreq;
TextFreq *lookup;
Selectivity selec;
int i;
/*
* There should be two more Numbers than Values, because the last two
* cells are taken for minimal and maximal frequency. Punt if not.
*/
if (nnumbers != nmcelem + 2)
return DEFAULT_TS_MATCH_SEL;
/*
* Transpose the data into a single array so we can use bsearch().
*/
lookup = (TextFreq *) palloc(sizeof(TextFreq) * nmcelem);
for (i = 0; i < nmcelem; i++)
{
/*
* The text Datums came from an array, so it cannot be compressed
* or stored out-of-line -- it's safe to use VARSIZE_ANY*.
*/
Assert(!VARATT_IS_COMPRESSED(mcelem[i]) && !VARATT_IS_EXTERNAL(mcelem[i]));
lookup[i].element = (text *) DatumGetPointer(mcelem[i]);
lookup[i].frequency = numbers[i];
}
/*
* Grab the lowest frequency. compute_tsvector_stats() stored it for us in
* the one before the last cell of the Numbers array. See ts_typanalyze.c
*/
minfreq = numbers[nnumbers - 2];
selec = tsquery_opr_selec(GETQUERY(query), GETOPERAND(query), lookup,
nmcelem, minfreq);
pfree(lookup);
return selec;
}
/*
* Traverse the tsquery in preorder, calculating selectivity as:
*
* selec(left_oper) * selec(right_oper) in AND nodes,
*
* selec(left_oper) + selec(right_oper) -
* selec(left_oper) * selec(right_oper) in OR nodes,
*
* 1 - select(oper) in NOT nodes
*
* freq[val] in VAL nodes, if the value is in MCELEM
* min(freq[MCELEM]) / 2 in VAL nodes, if it is not
*
*
* The MCELEM array is already sorted (see ts_typanalyze.c), so we can use
* binary search for determining freq[MCELEM].
*/
static Selectivity
tsquery_opr_selec(QueryItem *item, char *operand,
TextFreq *lookup, int length, float4 minfreq)
{
LexemeKey key;
TextFreq *searchres;
Selectivity selec, s1, s2;
/* since this function recurses, it could be driven to stack overflow */
check_stack_depth();
if (item->type == QI_VAL)
{
QueryOperand *oper = (QueryOperand *) item;
/*
* Prepare the key for bsearch().
*/
key.lexeme = operand + oper->distance;
key.length = oper->length;
searchres = (TextFreq *) bsearch(&key, lookup, length,
sizeof(TextFreq),
compare_lexeme_textfreq);
if (searchres)
{
/*
* The element is in MCELEM. Return precise selectivity (or at
* least as precise as ANALYZE could find out).
*/
return (Selectivity) searchres->frequency;
}
else
{
/*
* The element is not in MCELEM. Punt, but assert that the
* selectivity cannot be more than minfreq / 2.
*/
return (Selectivity) Min(DEFAULT_TS_MATCH_SEL, minfreq / 2);
}
}
/* Current TSQuery node is an operator */
switch (item->operator.oper)
{
case OP_NOT:
selec = 1.0 - tsquery_opr_selec(item + 1, operand,
lookup, length, minfreq);
break;
case OP_AND:
s1 = tsquery_opr_selec(item + 1, operand,
lookup, length, minfreq);
s2 = tsquery_opr_selec(item + item->operator.left, operand,
lookup, length, minfreq);
selec = s1 * s2;
break;
case OP_OR:
s1 = tsquery_opr_selec(item + 1, operand,
lookup, length, minfreq);
s2 = tsquery_opr_selec(item + item->operator.left, operand,
lookup, length, minfreq);
selec = s1 + s2 - s1 * s2;
break;
default:
elog(ERROR, "unrecognized operator: %d", item->operator.oper);
selec = 0; /* keep compiler quiet */
break;
}
/* Clamp intermediate results to stay sane despite roundoff error */
CLAMP_PROBABILITY(selec);
return selec;
}
/*
* bsearch() comparator for a lexeme (non-NULL terminated string with length)
* and a TextFreq. Use length, then byte-for-byte comparison, because that's
* how ANALYZE code sorted data before storing it in a statistic tuple.
* See ts_typanalyze.c for details.
*/
static int
compare_lexeme_textfreq(const void *e1, const void *e2)
{
const LexemeKey *key = (const LexemeKey *) e1;
const TextFreq *t = (const TextFreq *) e2;
int len1,
len2;
len1 = key->length;
len2 = VARSIZE_ANY_EXHDR(t->element);
/* Compare lengths first, possibly avoiding a strncmp call */
if (len1 > len2)
return 1;
else if (len1 < len2)
return -1;
/* Fall back on byte-for-byte comparison */
return strncmp(key->lexeme, VARDATA_ANY(t->element), len1);
}
...@@ -7,7 +7,7 @@ ...@@ -7,7 +7,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/tsearch/ts_typanalyze.c,v 1.1 2008/07/14 00:51:45 tgl Exp $ * $PostgreSQL: pgsql/src/backend/tsearch/ts_typanalyze.c,v 1.2 2008/09/19 19:03:40 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -43,7 +43,9 @@ static void compute_tsvector_stats(VacAttrStats *stats, ...@@ -43,7 +43,9 @@ static void compute_tsvector_stats(VacAttrStats *stats,
static void prune_lexemes_hashtable(HTAB *lexemes_tab, int b_current); static void prune_lexemes_hashtable(HTAB *lexemes_tab, int b_current);
static uint32 lexeme_hash(const void *key, Size keysize); static uint32 lexeme_hash(const void *key, Size keysize);
static int lexeme_match(const void *key1, const void *key2, Size keysize); static int lexeme_match(const void *key1, const void *key2, Size keysize);
static int trackitem_compare_desc(const void *e1, const void *e2); static int lexeme_compare(const void *key1, const void *key2);
static int trackitem_compare_frequencies_desc(const void *e1, const void *e2);
static int trackitem_compare_lexemes(const void *e1, const void *e2);
/* /*
...@@ -247,6 +249,7 @@ compute_tsvector_stats(VacAttrStats *stats, ...@@ -247,6 +249,7 @@ compute_tsvector_stats(VacAttrStats *stats,
int i; int i;
TrackItem **sort_table; TrackItem **sort_table;
int track_len; int track_len;
int minfreq, maxfreq;
stats->stats_valid = true; stats->stats_valid = true;
/* Do the simple null-frac and average width stats */ /* Do the simple null-frac and average width stats */
...@@ -273,7 +276,7 @@ compute_tsvector_stats(VacAttrStats *stats, ...@@ -273,7 +276,7 @@ compute_tsvector_stats(VacAttrStats *stats,
Assert(i == track_len); Assert(i == track_len);
qsort(sort_table, track_len, sizeof(TrackItem *), qsort(sort_table, track_len, sizeof(TrackItem *),
trackitem_compare_desc); trackitem_compare_frequencies_desc);
/* Suppress any single-occurrence items */ /* Suppress any single-occurrence items */
while (track_len > 0) while (track_len > 0)
...@@ -287,6 +290,26 @@ compute_tsvector_stats(VacAttrStats *stats, ...@@ -287,6 +290,26 @@ compute_tsvector_stats(VacAttrStats *stats,
if (num_mcelem > track_len) if (num_mcelem > track_len)
num_mcelem = track_len; num_mcelem = track_len;
/* Grab the minimal and maximal frequencies that will get stored */
minfreq = sort_table[num_mcelem - 1]->frequency;
maxfreq = sort_table[0]->frequency;
/*
* We want to store statistics sorted on the lexeme value using first
* length, then byte-for-byte comparison. The reason for doing length
* comparison first is that we don't care about the ordering so long
* as it's consistent, and comparing lengths first gives us a chance
* to avoid a strncmp() call.
*
* This is different from what we do with scalar statistics -- they get
* sorted on frequencies. The rationale is that we usually search
* through most common elements looking for a specific value, so we can
* grab its frequency. When values are presorted we can employ binary
* search for that. See ts_selfuncs.c for a real usage scenario.
*/
qsort(sort_table, num_mcelem, sizeof(TrackItem *),
trackitem_compare_lexemes);
/* Generate MCELEM slot entry */ /* Generate MCELEM slot entry */
if (num_mcelem > 0) if (num_mcelem > 0)
{ {
...@@ -296,8 +319,15 @@ compute_tsvector_stats(VacAttrStats *stats, ...@@ -296,8 +319,15 @@ compute_tsvector_stats(VacAttrStats *stats,
/* Must copy the target values into anl_context */ /* Must copy the target values into anl_context */
old_context = MemoryContextSwitchTo(stats->anl_context); old_context = MemoryContextSwitchTo(stats->anl_context);
/*
* We sorted statistics on the lexeme value, but we want to be
* able to find out the minimal and maximal frequency without
* going through all the values. We keep those two extra
* frequencies in two extra cells in mcelem_freqs.
*/
mcelem_values = (Datum *) palloc(num_mcelem * sizeof(Datum)); mcelem_values = (Datum *) palloc(num_mcelem * sizeof(Datum));
mcelem_freqs = (float4 *) palloc(num_mcelem * sizeof(float4)); mcelem_freqs = (float4 *) palloc((num_mcelem + 2) * sizeof(float4));
for (i = 0; i < num_mcelem; i++) for (i = 0; i < num_mcelem; i++)
{ {
...@@ -308,12 +338,15 @@ compute_tsvector_stats(VacAttrStats *stats, ...@@ -308,12 +338,15 @@ compute_tsvector_stats(VacAttrStats *stats,
item->key.length)); item->key.length));
mcelem_freqs[i] = (double) item->frequency / (double) nonnull_cnt; mcelem_freqs[i] = (double) item->frequency / (double) nonnull_cnt;
} }
mcelem_freqs[i++] = (double) minfreq / (double) nonnull_cnt;
mcelem_freqs[i] = (double) maxfreq / (double) nonnull_cnt;
MemoryContextSwitchTo(old_context); MemoryContextSwitchTo(old_context);
stats->stakind[0] = STATISTIC_KIND_MCELEM; stats->stakind[0] = STATISTIC_KIND_MCELEM;
stats->staop[0] = TextEqualOperator; stats->staop[0] = TextEqualOperator;
stats->stanumbers[0] = mcelem_freqs; stats->stanumbers[0] = mcelem_freqs;
stats->numnumbers[0] = num_mcelem; /* See above comment about two extra frequency fields */
stats->numnumbers[0] = num_mcelem + 2;
stats->stavalues[0] = mcelem_values; stats->stavalues[0] = mcelem_values;
stats->numvalues[0] = num_mcelem; stats->numvalues[0] = num_mcelem;
/* We are storing text values */ /* We are storing text values */
...@@ -378,26 +411,49 @@ lexeme_hash(const void *key, Size keysize) ...@@ -378,26 +411,49 @@ lexeme_hash(const void *key, Size keysize)
*/ */
static int static int
lexeme_match(const void *key1, const void *key2, Size keysize) lexeme_match(const void *key1, const void *key2, Size keysize)
{
/* The keysize parameter is superfluous, the keys store their lengths */
return lexeme_compare(key1, key2);
}
/*
* Comparison function for lexemes.
*/
static int
lexeme_compare(const void *key1, const void *key2)
{ {
const LexemeHashKey *d1 = (const LexemeHashKey *) key1; const LexemeHashKey *d1 = (const LexemeHashKey *) key1;
const LexemeHashKey *d2 = (const LexemeHashKey *) key2; const LexemeHashKey *d2 = (const LexemeHashKey *) key2;
/* The lexemes need to have the same length, and be memcmp-equal */ /* First, compare by length */
if (d1->length == d2->length && if (d1->length > d2->length)
memcmp(d1->lexeme, d2->lexeme, d1->length) == 0)
return 0;
else
return 1; return 1;
else if (d1->length < d2->length)
return -1;
/* Lengths are equal, do a byte-by-byte comparison */
return strncmp(d1->lexeme, d2->lexeme, d1->length);
} }
/* /*
* qsort() comparator for TrackItems - LC style (descending sort) * qsort() comparator for sorting TrackItems on frequencies (descending sort)
*/ */
static int static int
trackitem_compare_desc(const void *e1, const void *e2) trackitem_compare_frequencies_desc(const void *e1, const void *e2)
{ {
const TrackItem * const *t1 = (const TrackItem * const *) e1; const TrackItem * const *t1 = (const TrackItem * const *) e1;
const TrackItem * const *t2 = (const TrackItem * const *) e2; const TrackItem * const *t2 = (const TrackItem * const *) e2;
return (*t2)->frequency - (*t1)->frequency; return (*t2)->frequency - (*t1)->frequency;
} }
/*
* qsort() comparator for sorting TrackItems on lexemes
*/
static int
trackitem_compare_lexemes(const void *e1, const void *e2)
{
const TrackItem * const *t1 = (const TrackItem * const *) e1;
const TrackItem * const *t2 = (const TrackItem * const *) e2;
return lexeme_compare(&(*t1)->key, &(*t2)->key);
}
...@@ -37,7 +37,7 @@ ...@@ -37,7 +37,7 @@
* Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
* *
* $PostgreSQL: pgsql/src/include/catalog/catversion.h,v 1.486 2008/09/15 18:43:41 tgl Exp $ * $PostgreSQL: pgsql/src/include/catalog/catversion.h,v 1.487 2008/09/19 19:03:40 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -53,6 +53,6 @@ ...@@ -53,6 +53,6 @@
*/ */
/* yyyymmddN */ /* yyyymmddN */
#define CATALOG_VERSION_NO 200809151 #define CATALOG_VERSION_NO 200809191
#endif #endif
...@@ -8,7 +8,7 @@ ...@@ -8,7 +8,7 @@
* Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
* *
* $PostgreSQL: pgsql/src/include/catalog/pg_operator.h,v 1.162 2008/08/16 00:01:37 tgl Exp $ * $PostgreSQL: pgsql/src/include/catalog/pg_operator.h,v 1.163 2008/09/19 19:03:40 tgl Exp $
* *
* NOTES * NOTES
* the genbki.sh script reads this file and generates .bki * the genbki.sh script reads this file and generates .bki
...@@ -915,10 +915,10 @@ DATA(insert OID = 3630 ( "<>" PGNSP PGUID b f f 3614 3614 16 3630 3629 ts ...@@ -915,10 +915,10 @@ DATA(insert OID = 3630 ( "<>" PGNSP PGUID b f f 3614 3614 16 3630 3629 ts
DATA(insert OID = 3631 ( ">=" PGNSP PGUID b f f 3614 3614 16 3628 3627 tsvector_ge scalargtsel scalargtjoinsel )); DATA(insert OID = 3631 ( ">=" PGNSP PGUID b f f 3614 3614 16 3628 3627 tsvector_ge scalargtsel scalargtjoinsel ));
DATA(insert OID = 3632 ( ">" PGNSP PGUID b f f 3614 3614 16 3627 3628 tsvector_gt scalargtsel scalargtjoinsel )); DATA(insert OID = 3632 ( ">" PGNSP PGUID b f f 3614 3614 16 3627 3628 tsvector_gt scalargtsel scalargtjoinsel ));
DATA(insert OID = 3633 ( "||" PGNSP PGUID b f f 3614 3614 3614 0 0 tsvector_concat - - )); DATA(insert OID = 3633 ( "||" PGNSP PGUID b f f 3614 3614 3614 0 0 tsvector_concat - - ));
DATA(insert OID = 3636 ( "@@" PGNSP PGUID b f f 3614 3615 16 3637 0 ts_match_vq contsel contjoinsel )); DATA(insert OID = 3636 ( "@@" PGNSP PGUID b f f 3614 3615 16 3637 0 ts_match_vq tsmatchsel tsmatchjoinsel ));
DATA(insert OID = 3637 ( "@@" PGNSP PGUID b f f 3615 3614 16 3636 0 ts_match_qv contsel contjoinsel )); DATA(insert OID = 3637 ( "@@" PGNSP PGUID b f f 3615 3614 16 3636 0 ts_match_qv tsmatchsel tsmatchjoinsel ));
DATA(insert OID = 3660 ( "@@@" PGNSP PGUID b f f 3614 3615 16 3661 0 ts_match_vq contsel contjoinsel )); DATA(insert OID = 3660 ( "@@@" PGNSP PGUID b f f 3614 3615 16 3661 0 ts_match_vq tsmatchsel tsmatchjoinsel ));
DATA(insert OID = 3661 ( "@@@" PGNSP PGUID b f f 3615 3614 16 3660 0 ts_match_qv contsel contjoinsel )); DATA(insert OID = 3661 ( "@@@" PGNSP PGUID b f f 3615 3614 16 3660 0 ts_match_qv tsmatchsel tsmatchjoinsel ));
DATA(insert OID = 3674 ( "<" PGNSP PGUID b f f 3615 3615 16 3679 3678 tsquery_lt scalarltsel scalarltjoinsel )); DATA(insert OID = 3674 ( "<" PGNSP PGUID b f f 3615 3615 16 3679 3678 tsquery_lt scalarltsel scalarltjoinsel ));
DATA(insert OID = 3675 ( "<=" PGNSP PGUID b f f 3615 3615 16 3678 3679 tsquery_le scalarltsel scalarltjoinsel )); DATA(insert OID = 3675 ( "<=" PGNSP PGUID b f f 3615 3615 16 3678 3679 tsquery_le scalarltsel scalarltjoinsel ));
DATA(insert OID = 3676 ( "=" PGNSP PGUID b t f 3615 3615 16 3676 3677 tsquery_eq eqsel eqjoinsel )); DATA(insert OID = 3676 ( "=" PGNSP PGUID b t f 3615 3615 16 3676 3677 tsquery_eq eqsel eqjoinsel ));
......
...@@ -7,7 +7,7 @@ ...@@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
* *
* $PostgreSQL: pgsql/src/include/catalog/pg_proc.h,v 1.514 2008/09/10 18:09:20 alvherre Exp $ * $PostgreSQL: pgsql/src/include/catalog/pg_proc.h,v 1.515 2008/09/19 19:03:40 tgl Exp $
* *
* NOTES * NOTES
* The script catalog/genbki.sh reads this file and generates .bki * The script catalog/genbki.sh reads this file and generates .bki
...@@ -4434,6 +4434,10 @@ DESCR("GiST tsquery support"); ...@@ -4434,6 +4434,10 @@ DESCR("GiST tsquery support");
DATA(insert OID = 3701 ( gtsquery_consistent PGNSP PGUID 12 1 0 0 f f t f i 5 16 "2281 2281 23 26 2281" _null_ _null_ _null_ gtsquery_consistent _null_ _null_ _null_ )); DATA(insert OID = 3701 ( gtsquery_consistent PGNSP PGUID 12 1 0 0 f f t f i 5 16 "2281 2281 23 26 2281" _null_ _null_ _null_ gtsquery_consistent _null_ _null_ _null_ ));
DESCR("GiST tsquery support"); DESCR("GiST tsquery support");
DATA(insert OID = 3686 ( tsmatchsel PGNSP PGUID 12 1 0 0 f f t f s 4 701 "2281 26 2281 23" _null_ _null_ _null_ tsmatchsel _null_ _null_ _null_ ));
DESCR("restriction selectivity of tsvector @@ tsquery");
DATA(insert OID = 3687 ( tsmatchjoinsel PGNSP PGUID 12 1 0 0 f f t f s 5 701 "2281 26 2281 21 2281" _null_ _null_ _null_ tsmatchjoinsel _null_ _null_ _null_ ));
DESCR("join selectivity of tsvector @@ tsquery");
DATA(insert OID = 3688 ( ts_typanalyze PGNSP PGUID 12 1 0 0 f f t f s 1 16 "2281" _null_ _null_ _null_ ts_typanalyze _null_ _null_ _null_ )); DATA(insert OID = 3688 ( ts_typanalyze PGNSP PGUID 12 1 0 0 f f t f s 1 16 "2281" _null_ _null_ _null_ ts_typanalyze _null_ _null_ _null_ ));
DESCR("tsvector typanalyze"); DESCR("tsvector typanalyze");
......
...@@ -8,7 +8,7 @@ ...@@ -8,7 +8,7 @@
* Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
* *
* $PostgreSQL: pgsql/src/include/catalog/pg_statistic.h,v 1.36 2008/07/14 00:51:45 tgl Exp $ * $PostgreSQL: pgsql/src/include/catalog/pg_statistic.h,v 1.37 2008/09/19 19:03:41 tgl Exp $
* *
* NOTES * NOTES
* the genbki.sh script reads this file and generates .bki * the genbki.sh script reads this file and generates .bki
...@@ -243,8 +243,12 @@ typedef FormData_pg_statistic *Form_pg_statistic; ...@@ -243,8 +243,12 @@ typedef FormData_pg_statistic *Form_pg_statistic;
* values. This is useful when the column datatype is an array or some other * values. This is useful when the column datatype is an array or some other
* type with identifiable elements (for instance, tsvector). staop contains * type with identifiable elements (for instance, tsvector). staop contains
* the equality operator appropriate to the element type. stavalues contains * the equality operator appropriate to the element type. stavalues contains
* the most common element values, and stanumbers their frequencies, with the * the most common element values, and stanumbers their frequencies. Unlike
* same rules as for MCV slots. * MCV slots, the values are sorted into order (to support binary search
* for a particular value). Since this puts the minimum and maximum
* frequencies at unpredictable spots in stanumbers, there are two extra
* members of stanumbers, holding copies of the minimum and maximum
* frequencies.
* *
* Note: in current usage for tsvector columns, the stavalues elements are of * Note: in current usage for tsvector columns, the stavalues elements are of
* type text, even though their representation within tsvector is not * type text, even though their representation within tsvector is not
......
...@@ -5,7 +5,7 @@ ...@@ -5,7 +5,7 @@
* *
* Copyright (c) 1998-2008, PostgreSQL Global Development Group * Copyright (c) 1998-2008, PostgreSQL Global Development Group
* *
* $PostgreSQL: pgsql/src/include/tsearch/ts_type.h,v 1.13 2008/07/14 00:51:45 tgl Exp $ * $PostgreSQL: pgsql/src/include/tsearch/ts_type.h,v 1.14 2008/09/19 19:03:41 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -153,6 +153,9 @@ extern Datum ts_rankcd_wtt(PG_FUNCTION_ARGS); ...@@ -153,6 +153,9 @@ extern Datum ts_rankcd_wtt(PG_FUNCTION_ARGS);
extern Datum ts_rankcd_ttf(PG_FUNCTION_ARGS); extern Datum ts_rankcd_ttf(PG_FUNCTION_ARGS);
extern Datum ts_rankcd_wttf(PG_FUNCTION_ARGS); extern Datum ts_rankcd_wttf(PG_FUNCTION_ARGS);
extern Datum tsmatchsel(PG_FUNCTION_ARGS);
extern Datum tsmatchjoinsel(PG_FUNCTION_ARGS);
extern Datum ts_typanalyze(PG_FUNCTION_ARGS); extern Datum ts_typanalyze(PG_FUNCTION_ARGS);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment