Commit d2158b02 authored by Teodor Sigaev's avatar Teodor Sigaev

* Add support NULL to GiST.

* some refactoring and simplify code int gistutil.c and gist.c
* now in some cases it can be called used-defined
  picksplit method for non-first column in index, but here
	is a place to do more.
* small fix of docs related to support NULL.
parent 86722057
<!-- $PostgreSQL: pgsql/doc/src/sgml/indexam.sgml,v 2.11 2006/05/10 23:18:38 tgl Exp $ --> <!-- $PostgreSQL: pgsql/doc/src/sgml/indexam.sgml,v 2.12 2006/05/24 11:01:39 teodor Exp $ -->
<chapter id="indexam"> <chapter id="indexam">
<title>Index Access Method Interface Definition</title> <title>Index Access Method Interface Definition</title>
...@@ -126,8 +126,7 @@ ...@@ -126,8 +126,7 @@
used to scan for rows with <literal>a = 4</literal>, which is wrong if the used to scan for rows with <literal>a = 4</literal>, which is wrong if the
index omits rows where <literal>b</> is null. index omits rows where <literal>b</> is null.
It is, however, OK to omit rows where the first indexed column is null. It is, however, OK to omit rows where the first indexed column is null.
(GiST currently does so.) Thus, Thus, <structfield>amindexnulls</structfield> should be set true only if the
<structfield>amindexnulls</structfield> should be set true only if the
index access method indexes all rows, including arbitrary combinations of index access method indexes all rows, including arbitrary combinations of
null values. null values.
</para> </para>
......
<!-- $PostgreSQL: pgsql/doc/src/sgml/indices.sgml,v 1.56 2006/01/18 21:29:45 momjian Exp $ --> <!-- $PostgreSQL: pgsql/doc/src/sgml/indices.sgml,v 1.57 2006/05/24 11:01:39 teodor Exp $ -->
<chapter id="indexes"> <chapter id="indexes">
<title id="indexes-title">Indexes</title> <title id="indexes-title">Indexes</title>
...@@ -290,13 +290,13 @@ CREATE INDEX test2_mm_idx ON test2 (major, minor); ...@@ -290,13 +290,13 @@ CREATE INDEX test2_mm_idx ON test2 (major, minor);
</para> </para>
<para> <para>
A multicolumn GiST index can only be used when there is a query condition A multicolumn GiST index can be used with query conditions that
on its leading column. Conditions on additional columns restrict the involve any subset of the index's columns. Conditions on additional
entries returned by the index, but the condition on the first column is the columns restrict the entries returned by the index, but the condition on
most important one for determining how much of the index needs to be the first column is the most important one for determining how much of
scanned. A GiST index will be relatively ineffective if its first column the index needs to be scanned. A GiST index will be relatively
has only a few distinct values, even if there are many distinct values in ineffective if its first column has only a few distinct values, even if
additional columns. there are many distinct values in additional columns.
</para> </para>
<para> <para>
......
...@@ -8,7 +8,7 @@ ...@@ -8,7 +8,7 @@
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
* *
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/access/gist/gist.c,v 1.136 2006/05/19 16:15:17 teodor Exp $ * $PostgreSQL: pgsql/src/backend/access/gist/gist.c,v 1.137 2006/05/24 11:01:39 teodor Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -181,32 +181,13 @@ gistbuildCallback(Relation index, ...@@ -181,32 +181,13 @@ gistbuildCallback(Relation index,
{ {
GISTBuildState *buildstate = (GISTBuildState *) state; GISTBuildState *buildstate = (GISTBuildState *) state;
IndexTuple itup; IndexTuple itup;
GISTENTRY tmpcentry;
int i;
MemoryContext oldCtx; MemoryContext oldCtx;
/* GiST cannot index tuples with leading NULLs */
if (isnull[0])
return;
oldCtx = MemoryContextSwitchTo(buildstate->tmpCtx); oldCtx = MemoryContextSwitchTo(buildstate->tmpCtx);
/* immediately compress keys to normalize */
for (i = 0; i < buildstate->numindexattrs; i++)
{
if (isnull[i])
values[i] = (Datum) 0;
else
{
gistcentryinit(&buildstate->giststate, i, &tmpcentry, values[i],
NULL, NULL, (OffsetNumber) 0,
-1 /* size is currently bogus */ , TRUE, FALSE);
values[i] = tmpcentry.key;
}
}
/* form an index tuple and point it at the heap tuple */ /* form an index tuple and point it at the heap tuple */
itup = index_form_tuple(buildstate->giststate.tupdesc, values, isnull); itup = gistFormTuple(&buildstate->giststate, index,
values, NULL /* size is currently bogus */, isnull);
itup->t_tid = htup->t_self; itup->t_tid = htup->t_self;
/* /*
...@@ -243,34 +224,16 @@ gistinsert(PG_FUNCTION_ARGS) ...@@ -243,34 +224,16 @@ gistinsert(PG_FUNCTION_ARGS)
#endif #endif
IndexTuple itup; IndexTuple itup;
GISTSTATE giststate; GISTSTATE giststate;
GISTENTRY tmpentry;
int i;
MemoryContext oldCtx; MemoryContext oldCtx;
MemoryContext insertCtx; MemoryContext insertCtx;
/* GiST cannot index tuples with leading NULLs */
if (isnull[0])
PG_RETURN_BOOL(false);
insertCtx = createTempGistContext(); insertCtx = createTempGistContext();
oldCtx = MemoryContextSwitchTo(insertCtx); oldCtx = MemoryContextSwitchTo(insertCtx);
initGISTstate(&giststate, r); initGISTstate(&giststate, r);
/* immediately compress keys to normalize */ itup = gistFormTuple(&giststate, r,
for (i = 0; i < r->rd_att->natts; i++) values, NULL /* size is currently bogus */, isnull);
{
if (isnull[i])
values[i] = (Datum) 0;
else
{
gistcentryinit(&giststate, i, &tmpentry, values[i],
NULL, NULL, (OffsetNumber) 0,
-1 /* size is currently bogus */ , TRUE, FALSE);
values[i] = tmpentry.key;
}
}
itup = index_form_tuple(giststate.tupdesc, values, isnull);
itup->t_tid = *ht_ctid; itup->t_tid = *ht_ctid;
gistdoinsert(r, itup, &giststate); gistdoinsert(r, itup, &giststate);
...@@ -937,7 +900,147 @@ gistmakedeal(GISTInsertState *state, GISTSTATE *giststate) ...@@ -937,7 +900,147 @@ gistmakedeal(GISTInsertState *state, GISTSTATE *giststate)
} }
/* /*
* gistSplit -- split a page in the tree. * simple split page
*/
static void
gistSplitHalf(GIST_SPLITVEC *v, int len) {
int i;
v->spl_nright = v->spl_nleft = 0;
v->spl_left = (OffsetNumber *) palloc(len * sizeof(OffsetNumber));
v->spl_right= (OffsetNumber *) palloc(len * sizeof(OffsetNumber));
for(i = 1; i <= len; i++)
if ( i<len/2 )
v->spl_right[ v->spl_nright++ ] = i;
else
v->spl_left[ v->spl_nleft++ ] = i;
}
/*
* if it was invalid tuple then we need special processing.
* We move all invalid tuples on right page.
*
* if there is no place on left page, gistSplit will be called one more
* time for left page.
*
* Normally, we never exec this code, but after crash replay it's possible
* to get 'invalid' tuples (probability is low enough)
*/
static void
gistSplitByInvalid(GISTSTATE *giststate, GIST_SPLITVEC *v, IndexTuple *itup, int len) {
int i;
static OffsetNumber offInvTuples[ MaxOffsetNumber ];
int nOffInvTuples = 0;
for (i = 1; i <= len; i++)
if ( GistTupleIsInvalid(itup[i - 1]) )
offInvTuples[ nOffInvTuples++ ] = i;
if ( nOffInvTuples == len ) {
/* corner case, all tuples are invalid */
v->spl_rightvalid= v->spl_leftvalid = false;
gistSplitHalf( v, len );
} else {
GistSplitVec gsvp;
v->spl_right = offInvTuples;
v->spl_nright = nOffInvTuples;
v->spl_rightvalid = false;
v->spl_left = (OffsetNumber *) palloc(len * sizeof(OffsetNumber));
v->spl_nleft = 0;
for(i = 1; i <= len; i++)
if ( !GistTupleIsInvalid(itup[i - 1]) )
v->spl_left[ v->spl_nleft++ ] = i;
v->spl_leftvalid = true;
gsvp.idgrp = NULL;
gsvp.attrsize = v->spl_lattrsize;
gsvp.attr = v->spl_lattr;
gsvp.len = v->spl_nleft;
gsvp.entries = v->spl_left;
gsvp.isnull = v->spl_lisnull;
gistunionsubkeyvec(giststate, itup, &gsvp, 0);
}
}
/*
* trys to split page by attno key, in a case of null
* values move its to separate page.
*/
static void
gistSplitByKey(Relation r, Page page, IndexTuple *itup, int len, GISTSTATE *giststate,
GIST_SPLITVEC *v, GistEntryVector *entryvec, int attno) {
int i;
static OffsetNumber offNullTuples[ MaxOffsetNumber ];
int nOffNullTuples = 0;
for (i = 1; i <= len; i++) {
Datum datum;
bool IsNull;
if (!GistPageIsLeaf(page) && GistTupleIsInvalid(itup[i - 1])) {
gistSplitByInvalid(giststate, v, itup, len);
return;
}
datum = index_getattr(itup[i - 1], attno+1, giststate->tupdesc, &IsNull);
gistdentryinit(giststate, attno, &(entryvec->vector[i]),
datum, r, page, i,
ATTSIZE(datum, giststate->tupdesc, attno+1, IsNull),
FALSE, IsNull);
if ( IsNull )
offNullTuples[ nOffNullTuples++ ] = i;
}
v->spl_leftvalid = v->spl_rightvalid = true;
if ( nOffNullTuples == len ) {
/*
* Corner case: All keys in attno column are null, we should try to
* by keys in next column. It all keys in all columns
* are NULL just split page half by half
*/
v->spl_risnull[attno] = v->spl_lisnull[attno] = TRUE;
if ( attno+1 == r->rd_att->natts )
gistSplitHalf( v, len );
else
gistSplitByKey(r, page, itup, len, giststate, v, entryvec, attno+1);
} else if ( nOffNullTuples > 0 ) {
int j=0;
/*
* We don't want to mix NULLs and not-NULLs keys
* on one page, so move nulls to right page
*/
v->spl_right = offNullTuples;
v->spl_nright = nOffNullTuples;
v->spl_risnull[attno] = TRUE;
v->spl_left = (OffsetNumber *) palloc(len * sizeof(OffsetNumber));
v->spl_nleft = 0;
for(i = 1; i <= len; i++)
if ( j<v->spl_nright && offNullTuples[j] == i )
j++;
else
v->spl_left[ v->spl_nleft++ ] = i;
v->spl_idgrp = NULL;
gistunionsubkey(giststate, itup, v, 0);
} else {
/*
* all keys are not-null
*/
gistUserPicksplit(r, entryvec, attno, v, itup, len, giststate);
}
}
/*
* gistSplit -- split a page in the tree and fill struct
* used for XLOG and real writes buffers. Function is recursive, ie
* it will split page until keys will fit in every page.
*/ */
SplitedPageLayout * SplitedPageLayout *
gistSplit(Relation r, gistSplit(Relation r,
...@@ -951,77 +1054,14 @@ gistSplit(Relation r, ...@@ -951,77 +1054,14 @@ gistSplit(Relation r,
GIST_SPLITVEC v; GIST_SPLITVEC v;
GistEntryVector *entryvec; GistEntryVector *entryvec;
int i; int i;
OffsetNumber offInvTuples[ MaxOffsetNumber ];
int nOffInvTuples = 0;
SplitedPageLayout *res = NULL; SplitedPageLayout *res = NULL;
/* generate the item array */ /* generate the item array */
entryvec = palloc(GEVHDRSZ + (len + 1) * sizeof(GISTENTRY)); entryvec = palloc(GEVHDRSZ + (len + 1) * sizeof(GISTENTRY));
entryvec->n = len + 1; entryvec->n = len + 1;
for (i = 1; i <= len; i++) gistSplitByKey(r, page, itup, len, giststate,
{ &v, entryvec, 0);
Datum datum;
bool IsNull;
if (!GistPageIsLeaf(page) && GistTupleIsInvalid(itup[i - 1]))
/* remember position of invalid tuple */
offInvTuples[ nOffInvTuples++ ] = i;
if ( nOffInvTuples > 0 )
/* we can safely do not decompress other keys, because
we will do splecial processing, but
it's needed to find another invalid tuples */
continue;
datum = index_getattr(itup[i - 1], 1, giststate->tupdesc, &IsNull);
gistdentryinit(giststate, 0, &(entryvec->vector[i]),
datum, r, page, i,
ATTSIZE(datum, giststate->tupdesc, 1, IsNull),
FALSE, IsNull);
}
/*
* if it was invalid tuple then we need special processing.
* We move all invalid tuples on right page.
*
* if there is no place on left page, gistSplit will be called one more
* time for left page.
*
* Normally, we never exec this code, but after crash replay it's possible
* to get 'invalid' tuples (probability is low enough)
*/
if (nOffInvTuples > 0)
{
GistSplitVec gsvp;
v.spl_right = offInvTuples;
v.spl_nright = nOffInvTuples;
v.spl_rightvalid = false;
v.spl_left = (OffsetNumber *) palloc(entryvec->n * sizeof(OffsetNumber));
v.spl_nleft = 0;
for(i = 1; i <= len; i++)
if ( !GistTupleIsInvalid(itup[i - 1]) )
v.spl_left[ v.spl_nleft++ ] = i;
v.spl_leftvalid = true;
gsvp.idgrp = NULL;
gsvp.attrsize = v.spl_lattrsize;
gsvp.attr = v.spl_lattr;
gsvp.len = v.spl_nleft;
gsvp.entries = v.spl_left;
gsvp.isnull = v.spl_lisnull;
gistunionsubkeyvec(giststate, itup, &gsvp, true);
}
else
{
/* there is no invalid tuples, so usial processing */
gistUserPicksplit(r, entryvec, &v, itup, len, giststate);
v.spl_leftvalid = v.spl_rightvalid = true;
}
/* form left and right vector */ /* form left and right vector */
lvectup = (IndexTuple *) palloc(sizeof(IndexTuple) * (len + 1)); lvectup = (IndexTuple *) palloc(sizeof(IndexTuple) * (len + 1));
......
...@@ -8,7 +8,7 @@ ...@@ -8,7 +8,7 @@
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
* *
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/access/gist/gistget.c,v 1.56 2006/03/05 15:58:20 momjian Exp $ * $PostgreSQL: pgsql/src/backend/access/gist/gistget.c,v 1.57 2006/05/24 11:01:39 teodor Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -361,7 +361,7 @@ gistindex_keytest(IndexTuple tuple, ...@@ -361,7 +361,7 @@ gistindex_keytest(IndexTuple tuple,
IncrIndexProcessed(); IncrIndexProcessed();
/* /*
* Tuple doesn't restore after crash recovery because of inclomplete * Tuple doesn't restore after crash recovery because of incomplete
* insert * insert
*/ */
if (!GistPageIsLeaf(p) && GistTupleIsInvalid(tuple)) if (!GistPageIsLeaf(p) && GistTupleIsInvalid(tuple))
...@@ -378,14 +378,15 @@ gistindex_keytest(IndexTuple tuple, ...@@ -378,14 +378,15 @@ gistindex_keytest(IndexTuple tuple,
key->sk_attno, key->sk_attno,
giststate->tupdesc, giststate->tupdesc,
&isNull); &isNull);
/* is the index entry NULL? */
if (isNull) if ( key->sk_flags & SK_ISNULL ) {
{ /* is the compared-to datum NULL? on non-leaf page it's possible
/* XXX eventually should check if SK_ISNULL */ to have nulls in childs :( */
if ( isNull || !GistPageIsLeaf(p) )
return true;
return false; return false;
} } else if ( isNull )
/* is the compared-to datum NULL? */
if (key->sk_flags & SK_ISNULL)
return false; return false;
gistdentryinit(giststate, key->sk_attno - 1, &de, gistdentryinit(giststate, key->sk_attno - 1, &de,
......
This diff is collapsed.
...@@ -7,7 +7,7 @@ ...@@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2006, PostgreSQL Global Development Group * Portions Copyright (c) 1996-2006, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
* *
* $PostgreSQL: pgsql/src/include/access/gist_private.h,v 1.15 2006/05/19 16:15:17 teodor Exp $ * $PostgreSQL: pgsql/src/include/access/gist_private.h,v 1.16 2006/05/24 11:01:39 teodor Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -206,17 +206,6 @@ typedef struct ...@@ -206,17 +206,6 @@ typedef struct
/* root page of a gist index */ /* root page of a gist index */
#define GIST_ROOT_BLKNO 0 #define GIST_ROOT_BLKNO 0
/*
* When we update a relation on which we're doing a scan, we need to
* check the scan and fix it if the update affected any of the pages
* it touches. Otherwise, we can miss records that we should see.
* The only times we need to do this are for deletions and splits. See
* the code in gistscan.c for how the scan is fixed. These two
* constants tell us what sort of operation changed the index.
*/
#define GISTOP_DEL 0
/* #define GISTOP_SPLIT 1 */
#define ATTSIZE(datum, tupdesc, i, isnull) \ #define ATTSIZE(datum, tupdesc, i, isnull) \
( \ ( \
(isnull) ? 0 : \ (isnull) ? 0 : \
...@@ -291,12 +280,6 @@ extern IndexTuple gistgetadjusted(Relation r, ...@@ -291,12 +280,6 @@ extern IndexTuple gistgetadjusted(Relation r,
IndexTuple oldtup, IndexTuple oldtup,
IndexTuple addtup, IndexTuple addtup,
GISTSTATE *giststate); GISTSTATE *giststate);
extern int gistfindgroup(GISTSTATE *giststate,
GISTENTRY *valvec, GIST_SPLITVEC *spl);
extern void gistadjsubkey(Relation r,
IndexTuple *itup, int len,
GIST_SPLITVEC *v,
GISTSTATE *giststate);
extern IndexTuple gistFormTuple(GISTSTATE *giststate, extern IndexTuple gistFormTuple(GISTSTATE *giststate,
Relation r, Datum *attdata, int *datumsize, bool *isnull); Relation r, Datum *attdata, int *datumsize, bool *isnull);
...@@ -321,13 +304,15 @@ typedef struct { ...@@ -321,13 +304,15 @@ typedef struct {
} GistSplitVec; } GistSplitVec;
extern void gistunionsubkeyvec(GISTSTATE *giststate, extern void gistunionsubkeyvec(GISTSTATE *giststate,
IndexTuple *itvec, GistSplitVec *gsvp, bool isall); IndexTuple *itvec, GistSplitVec *gsvp, int startkey);
extern void gistunionsubkey(GISTSTATE *giststate, IndexTuple *itvec,
GIST_SPLITVEC *spl, int attno);
extern void GISTInitBuffer(Buffer b, uint32 f); extern void GISTInitBuffer(Buffer b, uint32 f);
extern void gistdentryinit(GISTSTATE *giststate, int nkey, GISTENTRY *e, extern void gistdentryinit(GISTSTATE *giststate, int nkey, GISTENTRY *e,
Datum k, Relation r, Page pg, OffsetNumber o, Datum k, Relation r, Page pg, OffsetNumber o,
int b, bool l, bool isNull); int b, bool l, bool isNull);
void gistUserPicksplit(Relation r, GistEntryVector *entryvec, GIST_SPLITVEC *v, void gistUserPicksplit(Relation r, GistEntryVector *entryvec, int attno, GIST_SPLITVEC *v,
IndexTuple *itup, int len, GISTSTATE *giststate); IndexTuple *itup, int len, GISTSTATE *giststate);
/* gistvacuum.c */ /* gistvacuum.c */
......
...@@ -37,7 +37,7 @@ ...@@ -37,7 +37,7 @@
* Portions Copyright (c) 1996-2006, PostgreSQL Global Development Group * Portions Copyright (c) 1996-2006, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
* *
* $PostgreSQL: pgsql/src/include/catalog/catversion.h,v 1.333 2006/05/19 19:08:26 alvherre Exp $ * $PostgreSQL: pgsql/src/include/catalog/catversion.h,v 1.334 2006/05/24 11:01:39 teodor Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -53,6 +53,6 @@ ...@@ -53,6 +53,6 @@
*/ */
/* yyyymmddN */ /* yyyymmddN */
#define CATALOG_VERSION_NO 200605191 #define CATALOG_VERSION_NO 200605241
#endif #endif
...@@ -8,7 +8,7 @@ ...@@ -8,7 +8,7 @@
* Portions Copyright (c) 1996-2006, PostgreSQL Global Development Group * Portions Copyright (c) 1996-2006, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
* *
* $PostgreSQL: pgsql/src/include/catalog/pg_am.h,v 1.42 2006/05/02 22:25:10 tgl Exp $ * $PostgreSQL: pgsql/src/include/catalog/pg_am.h,v 1.43 2006/05/24 11:01:39 teodor Exp $
* *
* NOTES * NOTES
* the genbki.sh script reads this file and generates .bki * the genbki.sh script reads this file and generates .bki
...@@ -114,7 +114,7 @@ DESCR("b-tree index access method"); ...@@ -114,7 +114,7 @@ DESCR("b-tree index access method");
DATA(insert OID = 405 ( hash 1 1 0 f f f f f t f hashinsert hashbeginscan hashgettuple hashgetmulti hashrescan hashendscan hashmarkpos hashrestrpos hashbuild hashbulkdelete hashvacuumcleanup hashcostestimate )); DATA(insert OID = 405 ( hash 1 1 0 f f f f f t f hashinsert hashbeginscan hashgettuple hashgetmulti hashrescan hashendscan hashmarkpos hashrestrpos hashbuild hashbulkdelete hashvacuumcleanup hashcostestimate ));
DESCR("hash index access method"); DESCR("hash index access method");
#define HASH_AM_OID 405 #define HASH_AM_OID 405
DATA(insert OID = 783 ( gist 100 7 0 f t f f t t t gistinsert gistbeginscan gistgettuple gistgetmulti gistrescan gistendscan gistmarkpos gistrestrpos gistbuild gistbulkdelete gistvacuumcleanup gistcostestimate )); DATA(insert OID = 783 ( gist 100 7 0 f t t t t t t gistinsert gistbeginscan gistgettuple gistgetmulti gistrescan gistendscan gistmarkpos gistrestrpos gistbuild gistbulkdelete gistvacuumcleanup gistcostestimate ));
DESCR("GiST index access method"); DESCR("GiST index access method");
#define GIST_AM_OID 783 #define GIST_AM_OID 783
DATA(insert OID = 2742 ( gin 100 4 0 f f f f t t f gininsert ginbeginscan gingettuple gingetmulti ginrescan ginendscan ginmarkpos ginrestrpos ginbuild ginbulkdelete ginvacuumcleanup gincostestimate )); DATA(insert OID = 2742 ( gin 100 4 0 f f f f t t f gininsert ginbeginscan gingettuple gingetmulti ginrescan ginendscan ginmarkpos ginrestrpos ginbuild ginbulkdelete ginvacuumcleanup gincostestimate ));
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment