* Add support NULL to GiST.

* some refactoring and simplify code int gistutil.c and gist.c * now in some cases it can be called used-defined picksplit method for non-first column in index, but here is a place to do more. * small fix of docs related to support NULL.

* Add support NULL to GiST.
* some refactoring and simplify code int gistutil.c and gist.c * now in some cases it can be called used-defined picksplit method for non-first column in index, but here is a place to do more. * small fix of docs related to support NULL.
d2158b02 · Teodor Sigaev · 86722057 · d2158b02 · d2158b02 · d2158b02
Commit d2158b02 authored May 24, 2006 by Teodor Sigaev
8 changed files
--- a/doc/src/sgml/indexam.sgml
+++ b/doc/src/sgml/indexam.sgml
-<!-- $PostgreSQL: pgsql/doc/src/sgml/indexam.sgml,v 2.11 2006/05/10 23:18:38 tgl Exp $ -->
+<!-- $PostgreSQL: pgsql/doc/src/sgml/indexam.sgml,v 2.12 2006/05/24 11:01:39 teodor Exp $ -->
 <chapter id="indexam">
 <title>Index Access Method Interface Definition</title>
@@ -126,8 +126,7 @@
   used to scan for rows with <literal>a = 4</literal>, which is wrong if the
   index omits rows where <literal>b</> is null.
   It is, however, OK to omit rows where the first indexed column is null.
-   (GiST currently does so.)  Thus,
+   Thus, <structfield>amindexnulls</structfield> should be set true only if the
-   <structfield>amindexnulls</structfield> should be set true only if the
   index access method indexes all rows, including arbitrary combinations of
   null values.
  </para>

--- a/doc/src/sgml/indices.sgml
+++ b/doc/src/sgml/indices.sgml
-<!-- $PostgreSQL: pgsql/doc/src/sgml/indices.sgml,v 1.56 2006/01/18 21:29:45 momjian Exp $ -->
+<!-- $PostgreSQL: pgsql/doc/src/sgml/indices.sgml,v 1.57 2006/05/24 11:01:39 teodor Exp $ -->
 <chapter id="indexes">
 <title id="indexes-title">Indexes</title>
@@ -290,13 +290,13 @@ CREATE INDEX test2_mm_idx ON test2 (major, minor);
  </para>
  <para>
-   A multicolumn GiST index can only be used when there is a query condition
+   A multicolumn GiST index can be used with query conditions that
-   on its leading column.  Conditions on additional columns restrict the
+   involve any subset of the index's columns. Conditions on additional 
-   entries returned by the index, but the condition on the first column is the
+   columns restrict the entries returned by the index, but the condition on 
-   most important one for determining how much of the index needs to be
+   the first column is the most important one for determining how much of 
-   scanned.  A GiST index will be relatively ineffective if its first column
+   the index needs to be scanned.  A GiST index will be relatively 
-   has only a few distinct values, even if there are many distinct values in
+   ineffective if its first column has only a few distinct values, even if 
-   additional columns.
+   there are many distinct values in additional columns.
  </para>
  <para>

--- a/src/backend/access/gist/gist.c
+++ b/src/backend/access/gist/gist.c
@@ -8,7 +8,7 @@
 * Portions Copyright (c) 1994, Regents of the University of California
 *
 * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/access/gist/gist.c,v 1.136 2006/05/19 16:15:17 teodor Exp $
+ *	  $PostgreSQL: pgsql/src/backend/access/gist/gist.c,v 1.137 2006/05/24 11:01:39 teodor Exp $
 *
 *-------------------------------------------------------------------------
 */
@@ -181,32 +181,13 @@ gistbuildCallback(Relation index,
 {
 	GISTBuildState *buildstate = (GISTBuildState *) state;
 	IndexTuple	itup;
-	GISTENTRY	tmpcentry;
-	int			i;
 	MemoryContext oldCtx;
-	/* GiST cannot index tuples with leading NULLs */
-	if (isnull[0])
-		return;
 	oldCtx = MemoryContextSwitchTo(buildstate->tmpCtx);
-	/* immediately compress keys to normalize */
-	for (i = 0; i < buildstate->numindexattrs; i++)
-	{
-		if (isnull[i])
-			values[i] = (Datum) 0;
-		else
-		{
-			gistcentryinit(&buildstate->giststate, i, &tmpcentry, values[i],
-						   NULL, NULL, (OffsetNumber) 0,
-						   -1 /* size is currently bogus */ , TRUE, FALSE);
-			values[i] = tmpcentry.key;
-		}
-	}
 	/* form an index tuple and point it at the heap tuple */
-	itup = index_form_tuple(buildstate->giststate.tupdesc, values, isnull);
+	itup = gistFormTuple(&buildstate->giststate, index,
+		values, NULL /* size is currently bogus */, isnull);
 	itup->t_tid = htup->t_self;
 	/*
@@ -243,34 +224,16 @@ gistinsert(PG_FUNCTION_ARGS)
 #endif
 	IndexTuple	itup;
 	GISTSTATE	giststate;
-	GISTENTRY	tmpentry;
-	int			i;
 	MemoryContext oldCtx;
 	MemoryContext insertCtx;
-	/* GiST cannot index tuples with leading NULLs */
-	if (isnull[0])
-		PG_RETURN_BOOL(false);
 	insertCtx = createTempGistContext();
 	oldCtx = MemoryContextSwitchTo(insertCtx);
 	initGISTstate(&giststate, r);
-	/* immediately compress keys to normalize */
+	itup = gistFormTuple(&giststate, r,
-	for (i = 0; i < r->rd_att->natts; i++)
+		values, NULL /* size is currently bogus */, isnull);
-	{
-		if (isnull[i])
-			values[i] = (Datum) 0;
-		else
-		{
-			gistcentryinit(&giststate, i, &tmpentry, values[i],
-						   NULL, NULL, (OffsetNumber) 0,
-						   -1 /* size is currently bogus */ , TRUE, FALSE);
-			values[i] = tmpentry.key;
-		}
-	}
-	itup = index_form_tuple(giststate.tupdesc, values, isnull);
 	itup->t_tid = *ht_ctid;
 	gistdoinsert(r, itup, &giststate);
@@ -937,7 +900,147 @@ gistmakedeal(GISTInsertState *state, GISTSTATE *giststate)
 }
 /*
- *	gistSplit -- split a page in the tree.
+ * simple split page 
+ */
+static void
+gistSplitHalf(GIST_SPLITVEC *v, int len) {
+	int i;
+	v->spl_nright	= v->spl_nleft		= 0;
+	v->spl_left = (OffsetNumber *) palloc(len * sizeof(OffsetNumber));
+	v->spl_right= (OffsetNumber *) palloc(len * sizeof(OffsetNumber));
+	for(i = 1; i <= len; i++)
+		if ( i<len/2 )
+			v->spl_right[ v->spl_nright++ ] = i;
+		else
+			v->spl_left[ v->spl_nleft++ ] = i;
+}
+/*
+ * if it was invalid tuple then we need special processing.
+ * We move all invalid tuples on right page. 
+ *
+ * if there is no place on left page, gistSplit will be called one more 
+ * time for left page.
+ *
+ * Normally, we never exec this code, but after crash replay it's possible
+ * to get 'invalid' tuples (probability is low enough)
+ */
+static void
+gistSplitByInvalid(GISTSTATE *giststate, GIST_SPLITVEC *v, IndexTuple *itup, int len) {
+	int i;
+	static OffsetNumber offInvTuples[ MaxOffsetNumber ];
+	int			 nOffInvTuples = 0;
+	for (i = 1; i <= len; i++)
+		if ( GistTupleIsInvalid(itup[i - 1]) )
+			offInvTuples[ nOffInvTuples++ ] = i;
+	if ( nOffInvTuples == len ) {
+		/* corner case, all tuples are invalid */
+		v->spl_rightvalid= v->spl_leftvalid 	= false;
+		gistSplitHalf( v, len );
+	} else {
+		GistSplitVec    gsvp;
+		v->spl_right = offInvTuples;
+		v->spl_nright = nOffInvTuples;
+		v->spl_rightvalid = false;
+		v->spl_left = (OffsetNumber *) palloc(len * sizeof(OffsetNumber));
+		v->spl_nleft = 0;
+		for(i = 1; i <= len; i++) 
+			if ( !GistTupleIsInvalid(itup[i - 1]) )
+				v->spl_left[ v->spl_nleft++ ] = i;
+		v->spl_leftvalid = true;
+		gsvp.idgrp = NULL;
+		gsvp.attrsize = v->spl_lattrsize;
+		gsvp.attr = v->spl_lattr;
+		gsvp.len = v->spl_nleft;
+		gsvp.entries = v->spl_left;
+		gsvp.isnull = v->spl_lisnull;
+		gistunionsubkeyvec(giststate, itup, &gsvp, 0);
+	}
+}
+/*
+ * trys to split page by attno key, in a case of null
+ * values move its to separate page. 
+ */
+static void
+gistSplitByKey(Relation r, Page page, IndexTuple *itup, int len, GISTSTATE *giststate, 
+		GIST_SPLITVEC *v, GistEntryVector *entryvec, int attno) {
+	int i;
+	static OffsetNumber offNullTuples[ MaxOffsetNumber ];
+	int			 nOffNullTuples = 0;
+	for (i = 1; i <= len; i++) {
+		Datum       datum;
+		bool        IsNull;
+		if (!GistPageIsLeaf(page) && GistTupleIsInvalid(itup[i - 1])) {
+			gistSplitByInvalid(giststate, v, itup, len);
+			return;
+		}
+		datum = index_getattr(itup[i - 1], attno+1, giststate->tupdesc, &IsNull);
+		gistdentryinit(giststate, attno, &(entryvec->vector[i]),
+					   datum, r, page, i,
+					   ATTSIZE(datum, giststate->tupdesc, attno+1, IsNull),
+					   FALSE, IsNull);
+		if ( IsNull )
+			offNullTuples[ nOffNullTuples++ ] = i;
+	}
+	v->spl_leftvalid = v->spl_rightvalid = true;
+	if ( nOffNullTuples == len ) {
+		/* 
+		 * Corner case: All keys in attno column are null, we should try to
+		 * by keys in next column. It all keys in all columns
+		 * are NULL just split page half by half
+		 */
+		v->spl_risnull[attno] = v->spl_lisnull[attno] = TRUE;
+		if ( attno+1 == r->rd_att->natts ) 
+			gistSplitHalf( v, len );
+		else 
+			gistSplitByKey(r, page, itup, len, giststate, v, entryvec, attno+1);
+	} else if ( nOffNullTuples > 0 ) {
+		int j=0;
+		/* 
+		 * We don't want to mix NULLs and not-NULLs keys
+		 * on one page, so move nulls to right page
+		 */
+		v->spl_right = offNullTuples;
+		v->spl_nright = nOffNullTuples;
+		v->spl_risnull[attno] = TRUE;
+		v->spl_left = (OffsetNumber *) palloc(len * sizeof(OffsetNumber));
+		v->spl_nleft = 0;
+		for(i = 1; i <= len; i++) 
+			if ( j<v->spl_nright && offNullTuples[j] == i ) 
+				j++;
+			else
+				v->spl_left[ v->spl_nleft++ ] = i;
+		v->spl_idgrp = NULL;
+		gistunionsubkey(giststate, itup, v, 0);
+	} else {
+		/*
+		 * all keys are not-null
+		 */
+		gistUserPicksplit(r, entryvec, attno, v, itup, len, giststate);
+	}
+}
+/*
+ * gistSplit -- split a page in the tree and fill struct
+ * used for XLOG and real writes buffers. Function is recursive, ie
+ * it will split page until keys will fit in every page.
 */
 SplitedPageLayout *
 gistSplit(Relation r,
@@ -951,77 +1054,14 @@ gistSplit(Relation r,
 	GIST_SPLITVEC v;
 	GistEntryVector *entryvec;
 	int			i;
-	OffsetNumber offInvTuples[ MaxOffsetNumber ];
-	int			 nOffInvTuples = 0;
 	SplitedPageLayout	*res = NULL;
 	/* generate the item array */
 	entryvec = palloc(GEVHDRSZ + (len + 1) * sizeof(GISTENTRY));
 	entryvec->n = len + 1;
-	for (i = 1; i <= len; i++)
+	gistSplitByKey(r, page, itup, len, giststate, 
-	{
+		&v, entryvec, 0);
-		Datum		datum;
-		bool		IsNull;
-		if (!GistPageIsLeaf(page) && GistTupleIsInvalid(itup[i - 1]))
-			/* remember position of invalid tuple */
-			offInvTuples[ nOffInvTuples++ ] = i;			
-		if ( nOffInvTuples > 0 )
-			/* we can safely do not decompress other keys, because 
-			   we will do splecial processing, but
-			   it's needed to find another invalid tuples */
-			continue;	
-		datum = index_getattr(itup[i - 1], 1, giststate->tupdesc, &IsNull);
-		gistdentryinit(giststate, 0, &(entryvec->vector[i]),
-					   datum, r, page, i,
-					   ATTSIZE(datum, giststate->tupdesc, 1, IsNull),
-					   FALSE, IsNull);
-	}
-	/*
-	 * if it was invalid tuple then we need special processing.
-	 * We move all invalid tuples on right page. 
-	 *
-	 * if there is no place on left page, gistSplit will be called one more 
-	 * time for left page.
-	 *
-	 * Normally, we never exec this code, but after crash replay it's possible
-	 * to get 'invalid' tuples (probability is low enough)
-	 */
-	if (nOffInvTuples > 0)
-	{
-		GistSplitVec    gsvp;
-		v.spl_right = offInvTuples;
-		v.spl_nright = nOffInvTuples;
-		v.spl_rightvalid = false;
-		v.spl_left = (OffsetNumber *) palloc(entryvec->n * sizeof(OffsetNumber));
-		v.spl_nleft = 0;
-		for(i = 1; i <= len; i++) 
-			if ( !GistTupleIsInvalid(itup[i - 1]) )
-				v.spl_left[ v.spl_nleft++ ] = i;
-		v.spl_leftvalid = true;
-		gsvp.idgrp = NULL;
-		gsvp.attrsize = v.spl_lattrsize;
-		gsvp.attr = v.spl_lattr;
-		gsvp.len = v.spl_nleft;
-		gsvp.entries = v.spl_left;
-		gsvp.isnull = v.spl_lisnull;
-		gistunionsubkeyvec(giststate, itup, &gsvp, true);
-	}
-	else
-	{
-		/* there is no invalid tuples, so usial processing */
-		gistUserPicksplit(r, entryvec, &v, itup, len, giststate);
-		v.spl_leftvalid = v.spl_rightvalid = true;
-	}
 	/* form left and right vector */
 	lvectup = (IndexTuple *) palloc(sizeof(IndexTuple) * (len + 1));

--- a/src/backend/access/gist/gistget.c
+++ b/src/backend/access/gist/gistget.c
@@ -8,7 +8,7 @@
 * Portions Copyright (c) 1994, Regents of the University of California
 *
 * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/access/gist/gistget.c,v 1.56 2006/03/05 15:58:20 momjian Exp $
+ *	  $PostgreSQL: pgsql/src/backend/access/gist/gistget.c,v 1.57 2006/05/24 11:01:39 teodor Exp $
 *
 *-------------------------------------------------------------------------
 */
@@ -361,7 +361,7 @@ gistindex_keytest(IndexTuple tuple,
 	IncrIndexProcessed();
 	/*
-	 * Tuple doesn't restore after crash recovery because of inclomplete
+	 * Tuple doesn't restore after crash recovery because of incomplete
 	 * insert
 	 */
 	if (!GistPageIsLeaf(p) && GistTupleIsInvalid(tuple))
@@ -378,14 +378,15 @@ gistindex_keytest(IndexTuple tuple,
 							  key->sk_attno,
 							  giststate->tupdesc,
 							  &isNull);
-		/* is the index entry NULL? */
-		if (isNull)
+		if ( key->sk_flags & SK_ISNULL ) {
-		{
+			/* is the compared-to datum NULL? on non-leaf page it's possible
-			/* XXX eventually should check if SK_ISNULL */
+			   to have nulls in childs :( */
+			if ( isNull || !GistPageIsLeaf(p) )
+				return true;
 			return false;
-		}
+		} else if ( isNull )
-		/* is the compared-to datum NULL? */
-		if (key->sk_flags & SK_ISNULL)
 			return false;
 		gistdentryinit(giststate, key->sk_attno - 1, &de,

--- a/src/backend/access/gist/gistutil.c
+++ b/src/backend/access/gist/gistutil.c
--- a/src/include/access/gist_private.h
+++ b/src/include/access/gist_private.h
@@ -7,7 +7,7 @@
 * Portions Copyright (c) 1996-2006, PostgreSQL Global Development Group
 * Portions Copyright (c) 1994, Regents of the University of California
 *
- * $PostgreSQL: pgsql/src/include/access/gist_private.h,v 1.15 2006/05/19 16:15:17 teodor Exp $
+ * $PostgreSQL: pgsql/src/include/access/gist_private.h,v 1.16 2006/05/24 11:01:39 teodor Exp $
 *
 *-------------------------------------------------------------------------
 */
@@ -206,17 +206,6 @@ typedef struct
 /* root page of a gist index */
 #define GIST_ROOT_BLKNO				0
-/*
- * When we update a relation on which we're doing a scan, we need to
- * check the scan and fix it if the update affected any of the pages
- * it touches.	Otherwise, we can miss records that we should see.
- * The only times we need to do this are for deletions and splits. See
- * the code in gistscan.c for how the scan is fixed. These two
- * constants tell us what sort of operation changed the index.
- */
-#define GISTOP_DEL		0
-/* #define GISTOP_SPLIT 1 */
 #define ATTSIZE(datum, tupdesc, i, isnull) \
 		( \
 				(isnull) ? 0 : \
@@ -291,12 +280,6 @@ extern IndexTuple gistgetadjusted(Relation r,
 				IndexTuple oldtup,
 				IndexTuple addtup,
 				GISTSTATE *giststate);
-extern int gistfindgroup(GISTSTATE *giststate,
-			  GISTENTRY *valvec, GIST_SPLITVEC *spl);
-extern void gistadjsubkey(Relation r,
-			  IndexTuple *itup, int len,
-			  GIST_SPLITVEC *v,
-			  GISTSTATE *giststate);
 extern IndexTuple gistFormTuple(GISTSTATE *giststate,
 			  Relation r, Datum *attdata, int *datumsize, bool *isnull);
@@ -321,13 +304,15 @@ typedef struct {
 } GistSplitVec;
 extern void gistunionsubkeyvec(GISTSTATE *giststate, 
-	IndexTuple *itvec, GistSplitVec *gsvp,  bool isall);
+	IndexTuple *itvec, GistSplitVec *gsvp,  int startkey);
+extern void gistunionsubkey(GISTSTATE *giststate, IndexTuple *itvec, 
+		GIST_SPLITVEC *spl, int attno);
 extern void GISTInitBuffer(Buffer b, uint32 f);
 extern void gistdentryinit(GISTSTATE *giststate, int nkey, GISTENTRY *e,
 			   Datum k, Relation r, Page pg, OffsetNumber o,
 			   int b, bool l, bool isNull);
-void gistUserPicksplit(Relation r, GistEntryVector *entryvec, GIST_SPLITVEC *v,
+void gistUserPicksplit(Relation r, GistEntryVector *entryvec, int attno, GIST_SPLITVEC *v,
 				  IndexTuple *itup, int len, GISTSTATE *giststate);
 /* gistvacuum.c */

--- a/src/include/catalog/catversion.h
+++ b/src/include/catalog/catversion.h
@@ -37,7 +37,7 @@
 * Portions Copyright (c) 1996-2006, PostgreSQL Global Development Group
 * Portions Copyright (c) 1994, Regents of the University of California
 *
- * $PostgreSQL: pgsql/src/include/catalog/catversion.h,v 1.333 2006/05/19 19:08:26 alvherre Exp $
+ * $PostgreSQL: pgsql/src/include/catalog/catversion.h,v 1.334 2006/05/24 11:01:39 teodor Exp $
 *
 *-------------------------------------------------------------------------
 */
@@ -53,6 +53,6 @@
 */
 /*							yyyymmddN */
-#define CATALOG_VERSION_NO	200605191
+#define CATALOG_VERSION_NO	200605241
 #endif
--- a/src/include/catalog/pg_am.h
+++ b/src/include/catalog/pg_am.h
@@ -8,7 +8,7 @@
 * Portions Copyright (c) 1996-2006, PostgreSQL Global Development Group
 * Portions Copyright (c) 1994, Regents of the University of California
 *
- * $PostgreSQL: pgsql/src/include/catalog/pg_am.h,v 1.42 2006/05/02 22:25:10 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/catalog/pg_am.h,v 1.43 2006/05/24 11:01:39 teodor Exp $
 *
 * NOTES
 *		the genbki.sh script reads this file and generates .bki
@@ -114,7 +114,7 @@ DESCR("b-tree index access method");
 DATA(insert OID = 405 (  hash	1 1 0 f f f f f t f hashinsert hashbeginscan hashgettuple hashgetmulti hashrescan hashendscan hashmarkpos hashrestrpos hashbuild hashbulkdelete hashvacuumcleanup hashcostestimate ));
 DESCR("hash index access method");
 #define HASH_AM_OID 405
-DATA(insert OID = 783 (  gist	100 7 0 f t f f t t t gistinsert gistbeginscan gistgettuple gistgetmulti gistrescan gistendscan gistmarkpos gistrestrpos gistbuild gistbulkdelete gistvacuumcleanup gistcostestimate ));
+DATA(insert OID = 783 (  gist	100 7 0 f t t t t t t gistinsert gistbeginscan gistgettuple gistgetmulti gistrescan gistendscan gistmarkpos gistrestrpos gistbuild gistbulkdelete gistvacuumcleanup gistcostestimate ));
 DESCR("GiST index access method");
 #define GIST_AM_OID 783
 DATA(insert OID = 2742 (  gin	100 4 0 f f f f t t f gininsert ginbeginscan gingettuple gingetmulti ginrescan ginendscan ginmarkpos ginrestrpos ginbuild ginbulkdelete ginvacuumcleanup gincostestimate ));