Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
P
Postgres FD Implementation
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Analytics
Analytics
CI / CD
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Abuhujair Javed
Postgres FD Implementation
Commits
4e57668d
Commit
4e57668d
authored
Sep 19, 2008
by
Tom Lane
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Create a selectivity estimation function for the text search @@ operator.
Jan Urbanski
parent
e2b7d0c6
Changes
9
Hide whitespace changes
Inline
Side-by-side
Showing
9 changed files
with
463 additions
and
30 deletions
+463
-30
doc/src/sgml/catalogs.sgml
doc/src/sgml/catalogs.sgml
+4
-1
src/backend/tsearch/Makefile
src/backend/tsearch/Makefile
+2
-2
src/backend/tsearch/ts_selfuncs.c
src/backend/tsearch/ts_selfuncs.c
+363
-0
src/backend/tsearch/ts_typanalyze.c
src/backend/tsearch/ts_typanalyze.c
+70
-14
src/include/catalog/catversion.h
src/include/catalog/catversion.h
+2
-2
src/include/catalog/pg_operator.h
src/include/catalog/pg_operator.h
+5
-5
src/include/catalog/pg_proc.h
src/include/catalog/pg_proc.h
+6
-2
src/include/catalog/pg_statistic.h
src/include/catalog/pg_statistic.h
+7
-3
src/include/tsearch/ts_type.h
src/include/tsearch/ts_type.h
+4
-1
No files found.
doc/src/sgml/catalogs.sgml
View file @
4e57668d
<!-- $PostgreSQL: pgsql/doc/src/sgml/catalogs.sgml,v 2.17
4 2008/09/15 18:43:41
tgl Exp $ -->
<!-- $PostgreSQL: pgsql/doc/src/sgml/catalogs.sgml,v 2.17
5 2008/09/19 19:03:40
tgl Exp $ -->
<!--
<!--
Documentation of the system catalogs, directed toward PostgreSQL developers
Documentation of the system catalogs, directed toward PostgreSQL developers
-->
-->
...
@@ -6664,6 +6664,9 @@
...
@@ -6664,6 +6664,9 @@
A list of the frequencies of the most common values or elements,
A list of the frequencies of the most common values or elements,
i.e., number of occurrences of each divided by total number of rows.
i.e., number of occurrences of each divided by total number of rows.
(NULL when <structfield>most_common_vals</structfield> is.)
(NULL when <structfield>most_common_vals</structfield> is.)
For some datatypes such as <type>tsvector</>, it can also store some
additional information, making it longer than the
<structfield>most_common_vals</> array.
</entry>
</entry>
</row>
</row>
...
...
src/backend/tsearch/Makefile
View file @
4e57668d
...
@@ -4,7 +4,7 @@
...
@@ -4,7 +4,7 @@
#
#
# Copyright (c) 2006-2008, PostgreSQL Global Development Group
# Copyright (c) 2006-2008, PostgreSQL Global Development Group
#
#
# $PostgreSQL: pgsql/src/backend/tsearch/Makefile,v 1.
7 2008/07/14 00:51:45
tgl Exp $
# $PostgreSQL: pgsql/src/backend/tsearch/Makefile,v 1.
8 2008/09/19 19:03:40
tgl Exp $
#
#
#-------------------------------------------------------------------------
#-------------------------------------------------------------------------
subdir
=
src/backend/tsearch
subdir
=
src/backend/tsearch
...
@@ -19,7 +19,7 @@ DICTFILES=synonym_sample.syn thesaurus_sample.ths hunspell_sample.affix \
...
@@ -19,7 +19,7 @@ DICTFILES=synonym_sample.syn thesaurus_sample.ths hunspell_sample.affix \
OBJS
=
ts_locale.o ts_parse.o wparser.o wparser_def.o dict.o
\
OBJS
=
ts_locale.o ts_parse.o wparser.o wparser_def.o dict.o
\
dict_simple.o dict_synonym.o dict_thesaurus.o
\
dict_simple.o dict_synonym.o dict_thesaurus.o
\
dict_ispell.o regis.o spell.o
\
dict_ispell.o regis.o spell.o
\
to_tsany.o ts_typanalyze.o ts_utils.o
to_tsany.o ts_
selfuncs.o ts_
typanalyze.o ts_utils.o
include
$(top_srcdir)/src/backend/common.mk
include
$(top_srcdir)/src/backend/common.mk
...
...
src/backend/tsearch/ts_selfuncs.c
0 → 100644
View file @
4e57668d
/*-------------------------------------------------------------------------
*
* ts_selfuncs.c
* Selectivity estimation functions for text search operators.
*
* Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/tsearch/ts_selfuncs.c,v 1.1 2008/09/19 19:03:40 tgl Exp $
*
*-------------------------------------------------------------------------
*/
#include "postgres.h"
#include "catalog/pg_statistic.h"
#include "catalog/pg_type.h"
#include "miscadmin.h"
#include "nodes/nodes.h"
#include "tsearch/ts_type.h"
#include "utils/lsyscache.h"
#include "utils/selfuncs.h"
#include "utils/syscache.h"
/*
* The default text search selectivity is chosen to be small enough to
* encourage indexscans for typical table densities. See selfuncs.h and
* DEFAULT_EQ_SEL for details.
*/
#define DEFAULT_TS_MATCH_SEL 0.005
/* lookup table type for binary searching through MCELEMs */
typedef
struct
{
text
*
element
;
float4
frequency
;
}
TextFreq
;
/* type of keys for bsearch'ing through an array of TextFreqs */
typedef
struct
{
char
*
lexeme
;
int
length
;
}
LexemeKey
;
static
Selectivity
tsquerysel
(
VariableStatData
*
vardata
,
Datum
constval
);
static
Selectivity
mcelem_tsquery_selec
(
TSQuery
query
,
Datum
*
mcelem
,
int
nmcelem
,
float4
*
numbers
,
int
nnumbers
);
static
Selectivity
tsquery_opr_selec
(
QueryItem
*
item
,
char
*
operand
,
TextFreq
*
lookup
,
int
length
,
float4
minfreq
);
static
int
compare_lexeme_textfreq
(
const
void
*
e1
,
const
void
*
e2
);
/*
* tsmatchsel -- Selectivity of "@@"
*
* restriction selectivity function for tsvector @@ tsquery and
* tsquery @@ tsvector
*/
Datum
tsmatchsel
(
PG_FUNCTION_ARGS
)
{
PlannerInfo
*
root
=
(
PlannerInfo
*
)
PG_GETARG_POINTER
(
0
);
#ifdef NOT_USED
Oid
operator
=
PG_GETARG_OID
(
1
);
#endif
List
*
args
=
(
List
*
)
PG_GETARG_POINTER
(
2
);
int
varRelid
=
PG_GETARG_INT32
(
3
);
VariableStatData
vardata
;
Node
*
other
;
bool
varonleft
;
Selectivity
selec
;
/*
* If expression is not variable = something or something = variable, then
* punt and return a default estimate.
*/
if
(
!
get_restriction_variable
(
root
,
args
,
varRelid
,
&
vardata
,
&
other
,
&
varonleft
))
PG_RETURN_FLOAT8
(
DEFAULT_TS_MATCH_SEL
);
/*
* Can't do anything useful if the something is not a constant, either.
*/
if
(
!
IsA
(
other
,
Const
))
{
ReleaseVariableStats
(
vardata
);
PG_RETURN_FLOAT8
(
DEFAULT_TS_MATCH_SEL
);
}
/*
* The "@@" operator is strict, so we can cope with NULL right away
*/
if
(((
Const
*
)
other
)
->
constisnull
)
{
ReleaseVariableStats
(
vardata
);
PG_RETURN_FLOAT8
(
0
.
0
);
}
/*
* OK, there's a Var and a Const we're dealing with here. We need the Var
* to be a TSVector (or else we don't have any useful statistic for it).
* We have to check this because the Var might be the TSQuery not the
* TSVector.
*/
if
(
vardata
.
vartype
==
TSVECTOROID
)
{
/* tsvector @@ tsquery or the other way around */
Assert
(((
Const
*
)
other
)
->
consttype
==
TSQUERYOID
);
selec
=
tsquerysel
(
&
vardata
,
((
Const
*
)
other
)
->
constvalue
);
}
else
{
/* The Var is something we don't have useful statistics for */
selec
=
DEFAULT_TS_MATCH_SEL
;
}
ReleaseVariableStats
(
vardata
);
CLAMP_PROBABILITY
(
selec
);
PG_RETURN_FLOAT8
((
float8
)
selec
);
}
/*
* tsmatchjoinsel -- join selectivity of "@@"
*
* join selectivity function for tsvector @@ tsquery and tsquery @@ tsvector
*/
Datum
tsmatchjoinsel
(
PG_FUNCTION_ARGS
)
{
/* for the moment we just punt */
PG_RETURN_FLOAT8
(
DEFAULT_TS_MATCH_SEL
);
}
/*
* @@ selectivity for tsvector var vs tsquery constant
*/
static
Selectivity
tsquerysel
(
VariableStatData
*
vardata
,
Datum
constval
)
{
Selectivity
selec
;
if
(
HeapTupleIsValid
(
vardata
->
statsTuple
))
{
TSQuery
query
;
Form_pg_statistic
stats
;
Datum
*
values
;
int
nvalues
;
float4
*
numbers
;
int
nnumbers
;
/* The caller made sure the const is a TSQuery, so get it now */
query
=
DatumGetTSQuery
(
constval
);
stats
=
(
Form_pg_statistic
)
GETSTRUCT
(
vardata
->
statsTuple
);
/* MCELEM will be an array of TEXT elements for a tsvector column */
if
(
get_attstatsslot
(
vardata
->
statsTuple
,
TEXTOID
,
-
1
,
STATISTIC_KIND_MCELEM
,
InvalidOid
,
&
values
,
&
nvalues
,
&
numbers
,
&
nnumbers
))
{
/*
* There is a most-common-elements slot for the tsvector Var, so
* use that.
*/
selec
=
mcelem_tsquery_selec
(
query
,
values
,
nvalues
,
numbers
,
nnumbers
);
free_attstatsslot
(
TEXTOID
,
values
,
nvalues
,
numbers
,
nnumbers
);
}
else
{
/* No most-common-elements info, so we must punt */
selec
=
(
Selectivity
)
DEFAULT_TS_MATCH_SEL
;
}
}
else
{
/* No stats at all, so we must punt */
selec
=
(
Selectivity
)
DEFAULT_TS_MATCH_SEL
;
}
return
selec
;
}
/*
* Extract data from the pg_statistic arrays into useful format.
*/
static
Selectivity
mcelem_tsquery_selec
(
TSQuery
query
,
Datum
*
mcelem
,
int
nmcelem
,
float4
*
numbers
,
int
nnumbers
)
{
float4
minfreq
;
TextFreq
*
lookup
;
Selectivity
selec
;
int
i
;
/*
* There should be two more Numbers than Values, because the last two
* cells are taken for minimal and maximal frequency. Punt if not.
*/
if
(
nnumbers
!=
nmcelem
+
2
)
return
DEFAULT_TS_MATCH_SEL
;
/*
* Transpose the data into a single array so we can use bsearch().
*/
lookup
=
(
TextFreq
*
)
palloc
(
sizeof
(
TextFreq
)
*
nmcelem
);
for
(
i
=
0
;
i
<
nmcelem
;
i
++
)
{
/*
* The text Datums came from an array, so it cannot be compressed
* or stored out-of-line -- it's safe to use VARSIZE_ANY*.
*/
Assert
(
!
VARATT_IS_COMPRESSED
(
mcelem
[
i
])
&&
!
VARATT_IS_EXTERNAL
(
mcelem
[
i
]));
lookup
[
i
].
element
=
(
text
*
)
DatumGetPointer
(
mcelem
[
i
]);
lookup
[
i
].
frequency
=
numbers
[
i
];
}
/*
* Grab the lowest frequency. compute_tsvector_stats() stored it for us in
* the one before the last cell of the Numbers array. See ts_typanalyze.c
*/
minfreq
=
numbers
[
nnumbers
-
2
];
selec
=
tsquery_opr_selec
(
GETQUERY
(
query
),
GETOPERAND
(
query
),
lookup
,
nmcelem
,
minfreq
);
pfree
(
lookup
);
return
selec
;
}
/*
* Traverse the tsquery in preorder, calculating selectivity as:
*
* selec(left_oper) * selec(right_oper) in AND nodes,
*
* selec(left_oper) + selec(right_oper) -
* selec(left_oper) * selec(right_oper) in OR nodes,
*
* 1 - select(oper) in NOT nodes
*
* freq[val] in VAL nodes, if the value is in MCELEM
* min(freq[MCELEM]) / 2 in VAL nodes, if it is not
*
*
* The MCELEM array is already sorted (see ts_typanalyze.c), so we can use
* binary search for determining freq[MCELEM].
*/
static
Selectivity
tsquery_opr_selec
(
QueryItem
*
item
,
char
*
operand
,
TextFreq
*
lookup
,
int
length
,
float4
minfreq
)
{
LexemeKey
key
;
TextFreq
*
searchres
;
Selectivity
selec
,
s1
,
s2
;
/* since this function recurses, it could be driven to stack overflow */
check_stack_depth
();
if
(
item
->
type
==
QI_VAL
)
{
QueryOperand
*
oper
=
(
QueryOperand
*
)
item
;
/*
* Prepare the key for bsearch().
*/
key
.
lexeme
=
operand
+
oper
->
distance
;
key
.
length
=
oper
->
length
;
searchres
=
(
TextFreq
*
)
bsearch
(
&
key
,
lookup
,
length
,
sizeof
(
TextFreq
),
compare_lexeme_textfreq
);
if
(
searchres
)
{
/*
* The element is in MCELEM. Return precise selectivity (or at
* least as precise as ANALYZE could find out).
*/
return
(
Selectivity
)
searchres
->
frequency
;
}
else
{
/*
* The element is not in MCELEM. Punt, but assert that the
* selectivity cannot be more than minfreq / 2.
*/
return
(
Selectivity
)
Min
(
DEFAULT_TS_MATCH_SEL
,
minfreq
/
2
);
}
}
/* Current TSQuery node is an operator */
switch
(
item
->
operator
.
oper
)
{
case
OP_NOT
:
selec
=
1
.
0
-
tsquery_opr_selec
(
item
+
1
,
operand
,
lookup
,
length
,
minfreq
);
break
;
case
OP_AND
:
s1
=
tsquery_opr_selec
(
item
+
1
,
operand
,
lookup
,
length
,
minfreq
);
s2
=
tsquery_opr_selec
(
item
+
item
->
operator
.
left
,
operand
,
lookup
,
length
,
minfreq
);
selec
=
s1
*
s2
;
break
;
case
OP_OR
:
s1
=
tsquery_opr_selec
(
item
+
1
,
operand
,
lookup
,
length
,
minfreq
);
s2
=
tsquery_opr_selec
(
item
+
item
->
operator
.
left
,
operand
,
lookup
,
length
,
minfreq
);
selec
=
s1
+
s2
-
s1
*
s2
;
break
;
default:
elog
(
ERROR
,
"unrecognized operator: %d"
,
item
->
operator
.
oper
);
selec
=
0
;
/* keep compiler quiet */
break
;
}
/* Clamp intermediate results to stay sane despite roundoff error */
CLAMP_PROBABILITY
(
selec
);
return
selec
;
}
/*
* bsearch() comparator for a lexeme (non-NULL terminated string with length)
* and a TextFreq. Use length, then byte-for-byte comparison, because that's
* how ANALYZE code sorted data before storing it in a statistic tuple.
* See ts_typanalyze.c for details.
*/
static
int
compare_lexeme_textfreq
(
const
void
*
e1
,
const
void
*
e2
)
{
const
LexemeKey
*
key
=
(
const
LexemeKey
*
)
e1
;
const
TextFreq
*
t
=
(
const
TextFreq
*
)
e2
;
int
len1
,
len2
;
len1
=
key
->
length
;
len2
=
VARSIZE_ANY_EXHDR
(
t
->
element
);
/* Compare lengths first, possibly avoiding a strncmp call */
if
(
len1
>
len2
)
return
1
;
else
if
(
len1
<
len2
)
return
-
1
;
/* Fall back on byte-for-byte comparison */
return
strncmp
(
key
->
lexeme
,
VARDATA_ANY
(
t
->
element
),
len1
);
}
src/backend/tsearch/ts_typanalyze.c
View file @
4e57668d
...
@@ -7,7 +7,7 @@
...
@@ -7,7 +7,7 @@
*
*
*
*
* IDENTIFICATION
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/tsearch/ts_typanalyze.c,v 1.
1 2008/07/14 00:51:45
tgl Exp $
* $PostgreSQL: pgsql/src/backend/tsearch/ts_typanalyze.c,v 1.
2 2008/09/19 19:03:40
tgl Exp $
*
*
*-------------------------------------------------------------------------
*-------------------------------------------------------------------------
*/
*/
...
@@ -43,7 +43,9 @@ static void compute_tsvector_stats(VacAttrStats *stats,
...
@@ -43,7 +43,9 @@ static void compute_tsvector_stats(VacAttrStats *stats,
static
void
prune_lexemes_hashtable
(
HTAB
*
lexemes_tab
,
int
b_current
);
static
void
prune_lexemes_hashtable
(
HTAB
*
lexemes_tab
,
int
b_current
);
static
uint32
lexeme_hash
(
const
void
*
key
,
Size
keysize
);
static
uint32
lexeme_hash
(
const
void
*
key
,
Size
keysize
);
static
int
lexeme_match
(
const
void
*
key1
,
const
void
*
key2
,
Size
keysize
);
static
int
lexeme_match
(
const
void
*
key1
,
const
void
*
key2
,
Size
keysize
);
static
int
trackitem_compare_desc
(
const
void
*
e1
,
const
void
*
e2
);
static
int
lexeme_compare
(
const
void
*
key1
,
const
void
*
key2
);
static
int
trackitem_compare_frequencies_desc
(
const
void
*
e1
,
const
void
*
e2
);
static
int
trackitem_compare_lexemes
(
const
void
*
e1
,
const
void
*
e2
);
/*
/*
...
@@ -247,6 +249,7 @@ compute_tsvector_stats(VacAttrStats *stats,
...
@@ -247,6 +249,7 @@ compute_tsvector_stats(VacAttrStats *stats,
int
i
;
int
i
;
TrackItem
**
sort_table
;
TrackItem
**
sort_table
;
int
track_len
;
int
track_len
;
int
minfreq
,
maxfreq
;
stats
->
stats_valid
=
true
;
stats
->
stats_valid
=
true
;
/* Do the simple null-frac and average width stats */
/* Do the simple null-frac and average width stats */
...
@@ -273,7 +276,7 @@ compute_tsvector_stats(VacAttrStats *stats,
...
@@ -273,7 +276,7 @@ compute_tsvector_stats(VacAttrStats *stats,
Assert
(
i
==
track_len
);
Assert
(
i
==
track_len
);
qsort
(
sort_table
,
track_len
,
sizeof
(
TrackItem
*
),
qsort
(
sort_table
,
track_len
,
sizeof
(
TrackItem
*
),
trackitem_compare_desc
);
trackitem_compare_
frequencies_
desc
);
/* Suppress any single-occurrence items */
/* Suppress any single-occurrence items */
while
(
track_len
>
0
)
while
(
track_len
>
0
)
...
@@ -287,6 +290,26 @@ compute_tsvector_stats(VacAttrStats *stats,
...
@@ -287,6 +290,26 @@ compute_tsvector_stats(VacAttrStats *stats,
if
(
num_mcelem
>
track_len
)
if
(
num_mcelem
>
track_len
)
num_mcelem
=
track_len
;
num_mcelem
=
track_len
;
/* Grab the minimal and maximal frequencies that will get stored */
minfreq
=
sort_table
[
num_mcelem
-
1
]
->
frequency
;
maxfreq
=
sort_table
[
0
]
->
frequency
;
/*
* We want to store statistics sorted on the lexeme value using first
* length, then byte-for-byte comparison. The reason for doing length
* comparison first is that we don't care about the ordering so long
* as it's consistent, and comparing lengths first gives us a chance
* to avoid a strncmp() call.
*
* This is different from what we do with scalar statistics -- they get
* sorted on frequencies. The rationale is that we usually search
* through most common elements looking for a specific value, so we can
* grab its frequency. When values are presorted we can employ binary
* search for that. See ts_selfuncs.c for a real usage scenario.
*/
qsort
(
sort_table
,
num_mcelem
,
sizeof
(
TrackItem
*
),
trackitem_compare_lexemes
);
/* Generate MCELEM slot entry */
/* Generate MCELEM slot entry */
if
(
num_mcelem
>
0
)
if
(
num_mcelem
>
0
)
{
{
...
@@ -296,8 +319,15 @@ compute_tsvector_stats(VacAttrStats *stats,
...
@@ -296,8 +319,15 @@ compute_tsvector_stats(VacAttrStats *stats,
/* Must copy the target values into anl_context */
/* Must copy the target values into anl_context */
old_context
=
MemoryContextSwitchTo
(
stats
->
anl_context
);
old_context
=
MemoryContextSwitchTo
(
stats
->
anl_context
);
/*
* We sorted statistics on the lexeme value, but we want to be
* able to find out the minimal and maximal frequency without
* going through all the values. We keep those two extra
* frequencies in two extra cells in mcelem_freqs.
*/
mcelem_values
=
(
Datum
*
)
palloc
(
num_mcelem
*
sizeof
(
Datum
));
mcelem_values
=
(
Datum
*
)
palloc
(
num_mcelem
*
sizeof
(
Datum
));
mcelem_freqs
=
(
float4
*
)
palloc
(
num_mcelem
*
sizeof
(
float4
));
mcelem_freqs
=
(
float4
*
)
palloc
(
(
num_mcelem
+
2
)
*
sizeof
(
float4
));
for
(
i
=
0
;
i
<
num_mcelem
;
i
++
)
for
(
i
=
0
;
i
<
num_mcelem
;
i
++
)
{
{
...
@@ -308,12 +338,15 @@ compute_tsvector_stats(VacAttrStats *stats,
...
@@ -308,12 +338,15 @@ compute_tsvector_stats(VacAttrStats *stats,
item
->
key
.
length
));
item
->
key
.
length
));
mcelem_freqs
[
i
]
=
(
double
)
item
->
frequency
/
(
double
)
nonnull_cnt
;
mcelem_freqs
[
i
]
=
(
double
)
item
->
frequency
/
(
double
)
nonnull_cnt
;
}
}
mcelem_freqs
[
i
++
]
=
(
double
)
minfreq
/
(
double
)
nonnull_cnt
;
mcelem_freqs
[
i
]
=
(
double
)
maxfreq
/
(
double
)
nonnull_cnt
;
MemoryContextSwitchTo
(
old_context
);
MemoryContextSwitchTo
(
old_context
);
stats
->
stakind
[
0
]
=
STATISTIC_KIND_MCELEM
;
stats
->
stakind
[
0
]
=
STATISTIC_KIND_MCELEM
;
stats
->
staop
[
0
]
=
TextEqualOperator
;
stats
->
staop
[
0
]
=
TextEqualOperator
;
stats
->
stanumbers
[
0
]
=
mcelem_freqs
;
stats
->
stanumbers
[
0
]
=
mcelem_freqs
;
stats
->
numnumbers
[
0
]
=
num_mcelem
;
/* See above comment about two extra frequency fields */
stats
->
numnumbers
[
0
]
=
num_mcelem
+
2
;
stats
->
stavalues
[
0
]
=
mcelem_values
;
stats
->
stavalues
[
0
]
=
mcelem_values
;
stats
->
numvalues
[
0
]
=
num_mcelem
;
stats
->
numvalues
[
0
]
=
num_mcelem
;
/* We are storing text values */
/* We are storing text values */
...
@@ -379,25 +412,48 @@ lexeme_hash(const void *key, Size keysize)
...
@@ -379,25 +412,48 @@ lexeme_hash(const void *key, Size keysize)
static
int
static
int
lexeme_match
(
const
void
*
key1
,
const
void
*
key2
,
Size
keysize
)
lexeme_match
(
const
void
*
key1
,
const
void
*
key2
,
Size
keysize
)
{
{
const
LexemeHashKey
*
d1
=
(
const
LexemeHashKey
*
)
key1
;
/* The keysize parameter is superfluous, the keys store their lengths */
const
LexemeHashKey
*
d2
=
(
const
LexemeHashKey
*
)
key2
;
return
lexeme_compare
(
key1
,
key2
);
}
/* The lexemes need to have the same length, and be memcmp-equal */
/*
if
(
d1
->
length
==
d2
->
length
&&
* Comparison function for lexemes.
memcmp
(
d1
->
lexeme
,
d2
->
lexeme
,
d1
->
length
)
==
0
)
*/
return
0
;
static
int
else
lexeme_compare
(
const
void
*
key1
,
const
void
*
key2
)
{
const
LexemeHashKey
*
d1
=
(
const
LexemeHashKey
*
)
key1
;
const
LexemeHashKey
*
d2
=
(
const
LexemeHashKey
*
)
key2
;
/* First, compare by length */
if
(
d1
->
length
>
d2
->
length
)
return
1
;
return
1
;
else
if
(
d1
->
length
<
d2
->
length
)
return
-
1
;
/* Lengths are equal, do a byte-by-byte comparison */
return
strncmp
(
d1
->
lexeme
,
d2
->
lexeme
,
d1
->
length
);
}
}
/*
/*
* qsort() comparator for
TrackItems - LC style
(descending sort)
* qsort() comparator for
sorting TrackItems on frequencies
(descending sort)
*/
*/
static
int
static
int
trackitem_compare_desc
(
const
void
*
e1
,
const
void
*
e2
)
trackitem_compare_
frequencies_
desc
(
const
void
*
e1
,
const
void
*
e2
)
{
{
const
TrackItem
*
const
*
t1
=
(
const
TrackItem
*
const
*
)
e1
;
const
TrackItem
*
const
*
t1
=
(
const
TrackItem
*
const
*
)
e1
;
const
TrackItem
*
const
*
t2
=
(
const
TrackItem
*
const
*
)
e2
;
const
TrackItem
*
const
*
t2
=
(
const
TrackItem
*
const
*
)
e2
;
return
(
*
t2
)
->
frequency
-
(
*
t1
)
->
frequency
;
return
(
*
t2
)
->
frequency
-
(
*
t1
)
->
frequency
;
}
}
/*
* qsort() comparator for sorting TrackItems on lexemes
*/
static
int
trackitem_compare_lexemes
(
const
void
*
e1
,
const
void
*
e2
)
{
const
TrackItem
*
const
*
t1
=
(
const
TrackItem
*
const
*
)
e1
;
const
TrackItem
*
const
*
t2
=
(
const
TrackItem
*
const
*
)
e2
;
return
lexeme_compare
(
&
(
*
t1
)
->
key
,
&
(
*
t2
)
->
key
);
}
src/include/catalog/catversion.h
View file @
4e57668d
...
@@ -37,7 +37,7 @@
...
@@ -37,7 +37,7 @@
* Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
* Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
* Portions Copyright (c) 1994, Regents of the University of California
*
*
* $PostgreSQL: pgsql/src/include/catalog/catversion.h,v 1.48
6 2008/09/15 18:43:41
tgl Exp $
* $PostgreSQL: pgsql/src/include/catalog/catversion.h,v 1.48
7 2008/09/19 19:03:40
tgl Exp $
*
*
*-------------------------------------------------------------------------
*-------------------------------------------------------------------------
*/
*/
...
@@ -53,6 +53,6 @@
...
@@ -53,6 +53,6 @@
*/
*/
/* yyyymmddN */
/* yyyymmddN */
#define CATALOG_VERSION_NO 2008091
5
1
#define CATALOG_VERSION_NO 2008091
9
1
#endif
#endif
src/include/catalog/pg_operator.h
View file @
4e57668d
...
@@ -8,7 +8,7 @@
...
@@ -8,7 +8,7 @@
* Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
* Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
* Portions Copyright (c) 1994, Regents of the University of California
*
*
* $PostgreSQL: pgsql/src/include/catalog/pg_operator.h,v 1.16
2 2008/08/16 00:01:37
tgl Exp $
* $PostgreSQL: pgsql/src/include/catalog/pg_operator.h,v 1.16
3 2008/09/19 19:03:40
tgl Exp $
*
*
* NOTES
* NOTES
* the genbki.sh script reads this file and generates .bki
* the genbki.sh script reads this file and generates .bki
...
@@ -915,10 +915,10 @@ DATA(insert OID = 3630 ( "<>" PGNSP PGUID b f f 3614 3614 16 3630 3629 ts
...
@@ -915,10 +915,10 @@ DATA(insert OID = 3630 ( "<>" PGNSP PGUID b f f 3614 3614 16 3630 3629 ts
DATA
(
insert
OID
=
3631
(
">="
PGNSP
PGUID
b
f
f
3614
3614
16
3628
3627
tsvector_ge
scalargtsel
scalargtjoinsel
));
DATA
(
insert
OID
=
3631
(
">="
PGNSP
PGUID
b
f
f
3614
3614
16
3628
3627
tsvector_ge
scalargtsel
scalargtjoinsel
));
DATA
(
insert
OID
=
3632
(
">"
PGNSP
PGUID
b
f
f
3614
3614
16
3627
3628
tsvector_gt
scalargtsel
scalargtjoinsel
));
DATA
(
insert
OID
=
3632
(
">"
PGNSP
PGUID
b
f
f
3614
3614
16
3627
3628
tsvector_gt
scalargtsel
scalargtjoinsel
));
DATA
(
insert
OID
=
3633
(
"||"
PGNSP
PGUID
b
f
f
3614
3614
3614
0
0
tsvector_concat
-
-
));
DATA
(
insert
OID
=
3633
(
"||"
PGNSP
PGUID
b
f
f
3614
3614
3614
0
0
tsvector_concat
-
-
));
DATA
(
insert
OID
=
3636
(
"@@"
PGNSP
PGUID
b
f
f
3614
3615
16
3637
0
ts_match_vq
contsel
contjoinsel
));
DATA
(
insert
OID
=
3636
(
"@@"
PGNSP
PGUID
b
f
f
3614
3615
16
3637
0
ts_match_vq
tsmatchsel
tsmatchjoinsel
));
DATA
(
insert
OID
=
3637
(
"@@"
PGNSP
PGUID
b
f
f
3615
3614
16
3636
0
ts_match_qv
contsel
contjoinsel
));
DATA
(
insert
OID
=
3637
(
"@@"
PGNSP
PGUID
b
f
f
3615
3614
16
3636
0
ts_match_qv
tsmatchsel
tsmatchjoinsel
));
DATA
(
insert
OID
=
3660
(
"@@@"
PGNSP
PGUID
b
f
f
3614
3615
16
3661
0
ts_match_vq
contsel
contjoinsel
));
DATA
(
insert
OID
=
3660
(
"@@@"
PGNSP
PGUID
b
f
f
3614
3615
16
3661
0
ts_match_vq
tsmatchsel
tsmatchjoinsel
));
DATA
(
insert
OID
=
3661
(
"@@@"
PGNSP
PGUID
b
f
f
3615
3614
16
3660
0
ts_match_qv
contsel
contjoinsel
));
DATA
(
insert
OID
=
3661
(
"@@@"
PGNSP
PGUID
b
f
f
3615
3614
16
3660
0
ts_match_qv
tsmatchsel
tsmatchjoinsel
));
DATA
(
insert
OID
=
3674
(
"<"
PGNSP
PGUID
b
f
f
3615
3615
16
3679
3678
tsquery_lt
scalarltsel
scalarltjoinsel
));
DATA
(
insert
OID
=
3674
(
"<"
PGNSP
PGUID
b
f
f
3615
3615
16
3679
3678
tsquery_lt
scalarltsel
scalarltjoinsel
));
DATA
(
insert
OID
=
3675
(
"<="
PGNSP
PGUID
b
f
f
3615
3615
16
3678
3679
tsquery_le
scalarltsel
scalarltjoinsel
));
DATA
(
insert
OID
=
3675
(
"<="
PGNSP
PGUID
b
f
f
3615
3615
16
3678
3679
tsquery_le
scalarltsel
scalarltjoinsel
));
DATA
(
insert
OID
=
3676
(
"="
PGNSP
PGUID
b
t
f
3615
3615
16
3676
3677
tsquery_eq
eqsel
eqjoinsel
));
DATA
(
insert
OID
=
3676
(
"="
PGNSP
PGUID
b
t
f
3615
3615
16
3676
3677
tsquery_eq
eqsel
eqjoinsel
));
...
...
src/include/catalog/pg_proc.h
View file @
4e57668d
...
@@ -7,7 +7,7 @@
...
@@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
* Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
* Portions Copyright (c) 1994, Regents of the University of California
*
*
* $PostgreSQL: pgsql/src/include/catalog/pg_proc.h,v 1.51
4 2008/09/10 18:09:20 alvherre
Exp $
* $PostgreSQL: pgsql/src/include/catalog/pg_proc.h,v 1.51
5 2008/09/19 19:03:40 tgl
Exp $
*
*
* NOTES
* NOTES
* The script catalog/genbki.sh reads this file and generates .bki
* The script catalog/genbki.sh reads this file and generates .bki
...
@@ -4434,7 +4434,11 @@ DESCR("GiST tsquery support");
...
@@ -4434,7 +4434,11 @@ DESCR("GiST tsquery support");
DATA
(
insert
OID
=
3701
(
gtsquery_consistent
PGNSP
PGUID
12
1
0
0
f
f
t
f
i
5
16
"2281 2281 23 26 2281"
_null_
_null_
_null_
gtsquery_consistent
_null_
_null_
_null_
));
DATA
(
insert
OID
=
3701
(
gtsquery_consistent
PGNSP
PGUID
12
1
0
0
f
f
t
f
i
5
16
"2281 2281 23 26 2281"
_null_
_null_
_null_
gtsquery_consistent
_null_
_null_
_null_
));
DESCR
(
"GiST tsquery support"
);
DESCR
(
"GiST tsquery support"
);
DATA
(
insert
OID
=
3688
(
ts_typanalyze
PGNSP
PGUID
12
1
0
0
f
f
t
f
s
1
16
"2281"
_null_
_null_
_null_
ts_typanalyze
_null_
_null_
_null_
));
DATA
(
insert
OID
=
3686
(
tsmatchsel
PGNSP
PGUID
12
1
0
0
f
f
t
f
s
4
701
"2281 26 2281 23"
_null_
_null_
_null_
tsmatchsel
_null_
_null_
_null_
));
DESCR
(
"restriction selectivity of tsvector @@ tsquery"
);
DATA
(
insert
OID
=
3687
(
tsmatchjoinsel
PGNSP
PGUID
12
1
0
0
f
f
t
f
s
5
701
"2281 26 2281 21 2281"
_null_
_null_
_null_
tsmatchjoinsel
_null_
_null_
_null_
));
DESCR
(
"join selectivity of tsvector @@ tsquery"
);
DATA
(
insert
OID
=
3688
(
ts_typanalyze
PGNSP
PGUID
12
1
0
0
f
f
t
f
s
1
16
"2281"
_null_
_null_
_null_
ts_typanalyze
_null_
_null_
_null_
));
DESCR
(
"tsvector typanalyze"
);
DESCR
(
"tsvector typanalyze"
);
DATA
(
insert
OID
=
3689
(
ts_stat
PGNSP
PGUID
12
10
10000
0
f
f
t
t
v
1
2249
"25"
"{25,25,23,23}"
"{i,o,o,o}"
"{query,word,ndoc,nentry}"
ts_stat1
_null_
_null_
_null_
));
DATA
(
insert
OID
=
3689
(
ts_stat
PGNSP
PGUID
12
10
10000
0
f
f
t
t
v
1
2249
"25"
"{25,25,23,23}"
"{i,o,o,o}"
"{query,word,ndoc,nentry}"
ts_stat1
_null_
_null_
_null_
));
...
...
src/include/catalog/pg_statistic.h
View file @
4e57668d
...
@@ -8,7 +8,7 @@
...
@@ -8,7 +8,7 @@
* Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
* Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
* Portions Copyright (c) 1994, Regents of the University of California
*
*
* $PostgreSQL: pgsql/src/include/catalog/pg_statistic.h,v 1.3
6 2008/07/14 00:51:45
tgl Exp $
* $PostgreSQL: pgsql/src/include/catalog/pg_statistic.h,v 1.3
7 2008/09/19 19:03:41
tgl Exp $
*
*
* NOTES
* NOTES
* the genbki.sh script reads this file and generates .bki
* the genbki.sh script reads this file and generates .bki
...
@@ -243,8 +243,12 @@ typedef FormData_pg_statistic *Form_pg_statistic;
...
@@ -243,8 +243,12 @@ typedef FormData_pg_statistic *Form_pg_statistic;
* values. This is useful when the column datatype is an array or some other
* values. This is useful when the column datatype is an array or some other
* type with identifiable elements (for instance, tsvector). staop contains
* type with identifiable elements (for instance, tsvector). staop contains
* the equality operator appropriate to the element type. stavalues contains
* the equality operator appropriate to the element type. stavalues contains
* the most common element values, and stanumbers their frequencies, with the
* the most common element values, and stanumbers their frequencies. Unlike
* same rules as for MCV slots.
* MCV slots, the values are sorted into order (to support binary search
* for a particular value). Since this puts the minimum and maximum
* frequencies at unpredictable spots in stanumbers, there are two extra
* members of stanumbers, holding copies of the minimum and maximum
* frequencies.
*
*
* Note: in current usage for tsvector columns, the stavalues elements are of
* Note: in current usage for tsvector columns, the stavalues elements are of
* type text, even though their representation within tsvector is not
* type text, even though their representation within tsvector is not
...
...
src/include/tsearch/ts_type.h
View file @
4e57668d
...
@@ -5,7 +5,7 @@
...
@@ -5,7 +5,7 @@
*
*
* Copyright (c) 1998-2008, PostgreSQL Global Development Group
* Copyright (c) 1998-2008, PostgreSQL Global Development Group
*
*
* $PostgreSQL: pgsql/src/include/tsearch/ts_type.h,v 1.1
3 2008/07/14 00:51:45
tgl Exp $
* $PostgreSQL: pgsql/src/include/tsearch/ts_type.h,v 1.1
4 2008/09/19 19:03:41
tgl Exp $
*
*
*-------------------------------------------------------------------------
*-------------------------------------------------------------------------
*/
*/
...
@@ -153,6 +153,9 @@ extern Datum ts_rankcd_wtt(PG_FUNCTION_ARGS);
...
@@ -153,6 +153,9 @@ extern Datum ts_rankcd_wtt(PG_FUNCTION_ARGS);
extern
Datum
ts_rankcd_ttf
(
PG_FUNCTION_ARGS
);
extern
Datum
ts_rankcd_ttf
(
PG_FUNCTION_ARGS
);
extern
Datum
ts_rankcd_wttf
(
PG_FUNCTION_ARGS
);
extern
Datum
ts_rankcd_wttf
(
PG_FUNCTION_ARGS
);
extern
Datum
tsmatchsel
(
PG_FUNCTION_ARGS
);
extern
Datum
tsmatchjoinsel
(
PG_FUNCTION_ARGS
);
extern
Datum
ts_typanalyze
(
PG_FUNCTION_ARGS
);
extern
Datum
ts_typanalyze
(
PG_FUNCTION_ARGS
);
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment