Commit d2158b02 authored by Teodor Sigaev's avatar Teodor Sigaev

* Add support NULL to GiST.

* some refactoring and simplify code int gistutil.c and gist.c
* now in some cases it can be called used-defined
  picksplit method for non-first column in index, but here
	is a place to do more.
* small fix of docs related to support NULL.
parent 86722057
<!-- $PostgreSQL: pgsql/doc/src/sgml/indexam.sgml,v 2.11 2006/05/10 23:18:38 tgl Exp $ -->
<!-- $PostgreSQL: pgsql/doc/src/sgml/indexam.sgml,v 2.12 2006/05/24 11:01:39 teodor Exp $ -->
<chapter id="indexam">
<title>Index Access Method Interface Definition</title>
......@@ -126,8 +126,7 @@
used to scan for rows with <literal>a = 4</literal>, which is wrong if the
index omits rows where <literal>b</> is null.
It is, however, OK to omit rows where the first indexed column is null.
(GiST currently does so.) Thus,
<structfield>amindexnulls</structfield> should be set true only if the
Thus, <structfield>amindexnulls</structfield> should be set true only if the
index access method indexes all rows, including arbitrary combinations of
null values.
</para>
......
<!-- $PostgreSQL: pgsql/doc/src/sgml/indices.sgml,v 1.56 2006/01/18 21:29:45 momjian Exp $ -->
<!-- $PostgreSQL: pgsql/doc/src/sgml/indices.sgml,v 1.57 2006/05/24 11:01:39 teodor Exp $ -->
<chapter id="indexes">
<title id="indexes-title">Indexes</title>
......@@ -290,13 +290,13 @@ CREATE INDEX test2_mm_idx ON test2 (major, minor);
</para>
<para>
A multicolumn GiST index can only be used when there is a query condition
on its leading column. Conditions on additional columns restrict the
entries returned by the index, but the condition on the first column is the
most important one for determining how much of the index needs to be
scanned. A GiST index will be relatively ineffective if its first column
has only a few distinct values, even if there are many distinct values in
additional columns.
A multicolumn GiST index can be used with query conditions that
involve any subset of the index's columns. Conditions on additional
columns restrict the entries returned by the index, but the condition on
the first column is the most important one for determining how much of
the index needs to be scanned. A GiST index will be relatively
ineffective if its first column has only a few distinct values, even if
there are many distinct values in additional columns.
</para>
<para>
......
......@@ -8,7 +8,7 @@
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/access/gist/gist.c,v 1.136 2006/05/19 16:15:17 teodor Exp $
* $PostgreSQL: pgsql/src/backend/access/gist/gist.c,v 1.137 2006/05/24 11:01:39 teodor Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -181,32 +181,13 @@ gistbuildCallback(Relation index,
{
GISTBuildState *buildstate = (GISTBuildState *) state;
IndexTuple itup;
GISTENTRY tmpcentry;
int i;
MemoryContext oldCtx;
/* GiST cannot index tuples with leading NULLs */
if (isnull[0])
return;
oldCtx = MemoryContextSwitchTo(buildstate->tmpCtx);
/* immediately compress keys to normalize */
for (i = 0; i < buildstate->numindexattrs; i++)
{
if (isnull[i])
values[i] = (Datum) 0;
else
{
gistcentryinit(&buildstate->giststate, i, &tmpcentry, values[i],
NULL, NULL, (OffsetNumber) 0,
-1 /* size is currently bogus */ , TRUE, FALSE);
values[i] = tmpcentry.key;
}
}
/* form an index tuple and point it at the heap tuple */
itup = index_form_tuple(buildstate->giststate.tupdesc, values, isnull);
itup = gistFormTuple(&buildstate->giststate, index,
values, NULL /* size is currently bogus */, isnull);
itup->t_tid = htup->t_self;
/*
......@@ -243,34 +224,16 @@ gistinsert(PG_FUNCTION_ARGS)
#endif
IndexTuple itup;
GISTSTATE giststate;
GISTENTRY tmpentry;
int i;
MemoryContext oldCtx;
MemoryContext insertCtx;
/* GiST cannot index tuples with leading NULLs */
if (isnull[0])
PG_RETURN_BOOL(false);
insertCtx = createTempGistContext();
oldCtx = MemoryContextSwitchTo(insertCtx);
initGISTstate(&giststate, r);
/* immediately compress keys to normalize */
for (i = 0; i < r->rd_att->natts; i++)
{
if (isnull[i])
values[i] = (Datum) 0;
else
{
gistcentryinit(&giststate, i, &tmpentry, values[i],
NULL, NULL, (OffsetNumber) 0,
-1 /* size is currently bogus */ , TRUE, FALSE);
values[i] = tmpentry.key;
}
}
itup = index_form_tuple(giststate.tupdesc, values, isnull);
itup = gistFormTuple(&giststate, r,
values, NULL /* size is currently bogus */, isnull);
itup->t_tid = *ht_ctid;
gistdoinsert(r, itup, &giststate);
......@@ -937,7 +900,147 @@ gistmakedeal(GISTInsertState *state, GISTSTATE *giststate)
}
/*
* gistSplit -- split a page in the tree.
* simple split page
*/
static void
gistSplitHalf(GIST_SPLITVEC *v, int len) {
int i;
v->spl_nright = v->spl_nleft = 0;
v->spl_left = (OffsetNumber *) palloc(len * sizeof(OffsetNumber));
v->spl_right= (OffsetNumber *) palloc(len * sizeof(OffsetNumber));
for(i = 1; i <= len; i++)
if ( i<len/2 )
v->spl_right[ v->spl_nright++ ] = i;
else
v->spl_left[ v->spl_nleft++ ] = i;
}
/*
* if it was invalid tuple then we need special processing.
* We move all invalid tuples on right page.
*
* if there is no place on left page, gistSplit will be called one more
* time for left page.
*
* Normally, we never exec this code, but after crash replay it's possible
* to get 'invalid' tuples (probability is low enough)
*/
static void
gistSplitByInvalid(GISTSTATE *giststate, GIST_SPLITVEC *v, IndexTuple *itup, int len) {
int i;
static OffsetNumber offInvTuples[ MaxOffsetNumber ];
int nOffInvTuples = 0;
for (i = 1; i <= len; i++)
if ( GistTupleIsInvalid(itup[i - 1]) )
offInvTuples[ nOffInvTuples++ ] = i;
if ( nOffInvTuples == len ) {
/* corner case, all tuples are invalid */
v->spl_rightvalid= v->spl_leftvalid = false;
gistSplitHalf( v, len );
} else {
GistSplitVec gsvp;
v->spl_right = offInvTuples;
v->spl_nright = nOffInvTuples;
v->spl_rightvalid = false;
v->spl_left = (OffsetNumber *) palloc(len * sizeof(OffsetNumber));
v->spl_nleft = 0;
for(i = 1; i <= len; i++)
if ( !GistTupleIsInvalid(itup[i - 1]) )
v->spl_left[ v->spl_nleft++ ] = i;
v->spl_leftvalid = true;
gsvp.idgrp = NULL;
gsvp.attrsize = v->spl_lattrsize;
gsvp.attr = v->spl_lattr;
gsvp.len = v->spl_nleft;
gsvp.entries = v->spl_left;
gsvp.isnull = v->spl_lisnull;
gistunionsubkeyvec(giststate, itup, &gsvp, 0);
}
}
/*
* trys to split page by attno key, in a case of null
* values move its to separate page.
*/
static void
gistSplitByKey(Relation r, Page page, IndexTuple *itup, int len, GISTSTATE *giststate,
GIST_SPLITVEC *v, GistEntryVector *entryvec, int attno) {
int i;
static OffsetNumber offNullTuples[ MaxOffsetNumber ];
int nOffNullTuples = 0;
for (i = 1; i <= len; i++) {
Datum datum;
bool IsNull;
if (!GistPageIsLeaf(page) && GistTupleIsInvalid(itup[i - 1])) {
gistSplitByInvalid(giststate, v, itup, len);
return;
}
datum = index_getattr(itup[i - 1], attno+1, giststate->tupdesc, &IsNull);
gistdentryinit(giststate, attno, &(entryvec->vector[i]),
datum, r, page, i,
ATTSIZE(datum, giststate->tupdesc, attno+1, IsNull),
FALSE, IsNull);
if ( IsNull )
offNullTuples[ nOffNullTuples++ ] = i;
}
v->spl_leftvalid = v->spl_rightvalid = true;
if ( nOffNullTuples == len ) {
/*
* Corner case: All keys in attno column are null, we should try to
* by keys in next column. It all keys in all columns
* are NULL just split page half by half
*/
v->spl_risnull[attno] = v->spl_lisnull[attno] = TRUE;
if ( attno+1 == r->rd_att->natts )
gistSplitHalf( v, len );
else
gistSplitByKey(r, page, itup, len, giststate, v, entryvec, attno+1);
} else if ( nOffNullTuples > 0 ) {
int j=0;
/*
* We don't want to mix NULLs and not-NULLs keys
* on one page, so move nulls to right page
*/
v->spl_right = offNullTuples;
v->spl_nright = nOffNullTuples;
v->spl_risnull[attno] = TRUE;
v->spl_left = (OffsetNumber *) palloc(len * sizeof(OffsetNumber));
v->spl_nleft = 0;
for(i = 1; i <= len; i++)
if ( j<v->spl_nright && offNullTuples[j] == i )
j++;
else
v->spl_left[ v->spl_nleft++ ] = i;
v->spl_idgrp = NULL;
gistunionsubkey(giststate, itup, v, 0);
} else {
/*
* all keys are not-null
*/
gistUserPicksplit(r, entryvec, attno, v, itup, len, giststate);
}
}
/*
* gistSplit -- split a page in the tree and fill struct
* used for XLOG and real writes buffers. Function is recursive, ie
* it will split page until keys will fit in every page.
*/
SplitedPageLayout *
gistSplit(Relation r,
......@@ -951,77 +1054,14 @@ gistSplit(Relation r,
GIST_SPLITVEC v;
GistEntryVector *entryvec;
int i;
OffsetNumber offInvTuples[ MaxOffsetNumber ];
int nOffInvTuples = 0;
SplitedPageLayout *res = NULL;
/* generate the item array */
entryvec = palloc(GEVHDRSZ + (len + 1) * sizeof(GISTENTRY));
entryvec->n = len + 1;
for (i = 1; i <= len; i++)
{
Datum datum;
bool IsNull;
if (!GistPageIsLeaf(page) && GistTupleIsInvalid(itup[i - 1]))
/* remember position of invalid tuple */
offInvTuples[ nOffInvTuples++ ] = i;
if ( nOffInvTuples > 0 )
/* we can safely do not decompress other keys, because
we will do splecial processing, but
it's needed to find another invalid tuples */
continue;
datum = index_getattr(itup[i - 1], 1, giststate->tupdesc, &IsNull);
gistdentryinit(giststate, 0, &(entryvec->vector[i]),
datum, r, page, i,
ATTSIZE(datum, giststate->tupdesc, 1, IsNull),
FALSE, IsNull);
}
/*
* if it was invalid tuple then we need special processing.
* We move all invalid tuples on right page.
*
* if there is no place on left page, gistSplit will be called one more
* time for left page.
*
* Normally, we never exec this code, but after crash replay it's possible
* to get 'invalid' tuples (probability is low enough)
*/
if (nOffInvTuples > 0)
{
GistSplitVec gsvp;
v.spl_right = offInvTuples;
v.spl_nright = nOffInvTuples;
v.spl_rightvalid = false;
v.spl_left = (OffsetNumber *) palloc(entryvec->n * sizeof(OffsetNumber));
v.spl_nleft = 0;
for(i = 1; i <= len; i++)
if ( !GistTupleIsInvalid(itup[i - 1]) )
v.spl_left[ v.spl_nleft++ ] = i;
v.spl_leftvalid = true;
gsvp.idgrp = NULL;
gsvp.attrsize = v.spl_lattrsize;
gsvp.attr = v.spl_lattr;
gsvp.len = v.spl_nleft;
gsvp.entries = v.spl_left;
gsvp.isnull = v.spl_lisnull;
gistunionsubkeyvec(giststate, itup, &gsvp, true);
}
else
{
/* there is no invalid tuples, so usial processing */
gistUserPicksplit(r, entryvec, &v, itup, len, giststate);
v.spl_leftvalid = v.spl_rightvalid = true;
}
gistSplitByKey(r, page, itup, len, giststate,
&v, entryvec, 0);
/* form left and right vector */
lvectup = (IndexTuple *) palloc(sizeof(IndexTuple) * (len + 1));
......
......@@ -8,7 +8,7 @@
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/access/gist/gistget.c,v 1.56 2006/03/05 15:58:20 momjian Exp $
* $PostgreSQL: pgsql/src/backend/access/gist/gistget.c,v 1.57 2006/05/24 11:01:39 teodor Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -361,7 +361,7 @@ gistindex_keytest(IndexTuple tuple,
IncrIndexProcessed();
/*
* Tuple doesn't restore after crash recovery because of inclomplete
* Tuple doesn't restore after crash recovery because of incomplete
* insert
*/
if (!GistPageIsLeaf(p) && GistTupleIsInvalid(tuple))
......@@ -378,14 +378,15 @@ gistindex_keytest(IndexTuple tuple,
key->sk_attno,
giststate->tupdesc,
&isNull);
/* is the index entry NULL? */
if (isNull)
{
/* XXX eventually should check if SK_ISNULL */
if ( key->sk_flags & SK_ISNULL ) {
/* is the compared-to datum NULL? on non-leaf page it's possible
to have nulls in childs :( */
if ( isNull || !GistPageIsLeaf(p) )
return true;
return false;
}
/* is the compared-to datum NULL? */
if (key->sk_flags & SK_ISNULL)
} else if ( isNull )
return false;
gistdentryinit(giststate, key->sk_attno - 1, &de,
......
This diff is collapsed.
......@@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2006, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* $PostgreSQL: pgsql/src/include/access/gist_private.h,v 1.15 2006/05/19 16:15:17 teodor Exp $
* $PostgreSQL: pgsql/src/include/access/gist_private.h,v 1.16 2006/05/24 11:01:39 teodor Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -206,17 +206,6 @@ typedef struct
/* root page of a gist index */
#define GIST_ROOT_BLKNO 0
/*
* When we update a relation on which we're doing a scan, we need to
* check the scan and fix it if the update affected any of the pages
* it touches. Otherwise, we can miss records that we should see.
* The only times we need to do this are for deletions and splits. See
* the code in gistscan.c for how the scan is fixed. These two
* constants tell us what sort of operation changed the index.
*/
#define GISTOP_DEL 0
/* #define GISTOP_SPLIT 1 */
#define ATTSIZE(datum, tupdesc, i, isnull) \
( \
(isnull) ? 0 : \
......@@ -291,12 +280,6 @@ extern IndexTuple gistgetadjusted(Relation r,
IndexTuple oldtup,
IndexTuple addtup,
GISTSTATE *giststate);
extern int gistfindgroup(GISTSTATE *giststate,
GISTENTRY *valvec, GIST_SPLITVEC *spl);
extern void gistadjsubkey(Relation r,
IndexTuple *itup, int len,
GIST_SPLITVEC *v,
GISTSTATE *giststate);
extern IndexTuple gistFormTuple(GISTSTATE *giststate,
Relation r, Datum *attdata, int *datumsize, bool *isnull);
......@@ -321,13 +304,15 @@ typedef struct {
} GistSplitVec;
extern void gistunionsubkeyvec(GISTSTATE *giststate,
IndexTuple *itvec, GistSplitVec *gsvp, bool isall);
IndexTuple *itvec, GistSplitVec *gsvp, int startkey);
extern void gistunionsubkey(GISTSTATE *giststate, IndexTuple *itvec,
GIST_SPLITVEC *spl, int attno);
extern void GISTInitBuffer(Buffer b, uint32 f);
extern void gistdentryinit(GISTSTATE *giststate, int nkey, GISTENTRY *e,
Datum k, Relation r, Page pg, OffsetNumber o,
int b, bool l, bool isNull);
void gistUserPicksplit(Relation r, GistEntryVector *entryvec, GIST_SPLITVEC *v,
void gistUserPicksplit(Relation r, GistEntryVector *entryvec, int attno, GIST_SPLITVEC *v,
IndexTuple *itup, int len, GISTSTATE *giststate);
/* gistvacuum.c */
......
......@@ -37,7 +37,7 @@
* Portions Copyright (c) 1996-2006, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* $PostgreSQL: pgsql/src/include/catalog/catversion.h,v 1.333 2006/05/19 19:08:26 alvherre Exp $
* $PostgreSQL: pgsql/src/include/catalog/catversion.h,v 1.334 2006/05/24 11:01:39 teodor Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -53,6 +53,6 @@
*/
/* yyyymmddN */
#define CATALOG_VERSION_NO 200605191
#define CATALOG_VERSION_NO 200605241
#endif
......@@ -8,7 +8,7 @@
* Portions Copyright (c) 1996-2006, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* $PostgreSQL: pgsql/src/include/catalog/pg_am.h,v 1.42 2006/05/02 22:25:10 tgl Exp $
* $PostgreSQL: pgsql/src/include/catalog/pg_am.h,v 1.43 2006/05/24 11:01:39 teodor Exp $
*
* NOTES
* the genbki.sh script reads this file and generates .bki
......@@ -114,7 +114,7 @@ DESCR("b-tree index access method");
DATA(insert OID = 405 ( hash 1 1 0 f f f f f t f hashinsert hashbeginscan hashgettuple hashgetmulti hashrescan hashendscan hashmarkpos hashrestrpos hashbuild hashbulkdelete hashvacuumcleanup hashcostestimate ));
DESCR("hash index access method");
#define HASH_AM_OID 405
DATA(insert OID = 783 ( gist 100 7 0 f t f f t t t gistinsert gistbeginscan gistgettuple gistgetmulti gistrescan gistendscan gistmarkpos gistrestrpos gistbuild gistbulkdelete gistvacuumcleanup gistcostestimate ));
DATA(insert OID = 783 ( gist 100 7 0 f t t t t t t gistinsert gistbeginscan gistgettuple gistgetmulti gistrescan gistendscan gistmarkpos gistrestrpos gistbuild gistbulkdelete gistvacuumcleanup gistcostestimate ));
DESCR("GiST index access method");
#define GIST_AM_OID 783
DATA(insert OID = 2742 ( gin 100 4 0 f f f f t t f gininsert ginbeginscan gingettuple gingetmulti ginrescan ginendscan ginmarkpos ginrestrpos ginbuild ginbulkdelete ginvacuumcleanup gincostestimate ));
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment