Commit 48c7d9f6 authored by Tom Lane's avatar Tom Lane

Improve GIN indexscan cost estimation.

The better estimate requires more statistics than we previously stored:
in particular, counts of "entry" versus "data" pages within the index,
as well as knowledge of the number of distinct key values.  We collect
this information during initial index build and update it during VACUUM,
storing the info in new fields on the index metapage.  No initdb is
required because these fields will read as zeroes in a pre-existing
index, and the new gincostestimate code is coded to behave (reasonably)
sanely if they are zeroes.

Teodor Sigaev, reviewed by Jan Urbanski, Tom Lane, and Itagaki Takahiro.
parent cd0e8253
......@@ -268,10 +268,13 @@ findParents(GinBtree btree, GinBtreeStack *stack,
/*
* Insert value (stored in GinBtree) to tree described by stack
*
* During an index build, buildStats is non-null and the counters
* it contains should be incremented as needed.
*
* NB: the passed-in stack is freed, as though by freeGinBtreeStack.
*/
void
ginInsertValue(GinBtree btree, GinBtreeStack *stack)
ginInsertValue(GinBtree btree, GinBtreeStack *stack, GinStatsData *buildStats)
{
GinBtreeStack *parent = stack;
BlockNumber rootBlkno = InvalidBuffer;
......@@ -330,6 +333,15 @@ ginInsertValue(GinBtree btree, GinBtreeStack *stack)
((ginxlogSplit *) (rdata->data))->rootBlkno = rootBlkno;
/* During index build, count the newly-split page */
if (buildStats)
{
if (btree->isData)
buildStats->nDataPages++;
else
buildStats->nEntryPages++;
}
parent = stack->parent;
if (parent == NULL)
......@@ -381,6 +393,15 @@ ginInsertValue(GinBtree btree, GinBtreeStack *stack)
freeGinBtreeStack(stack);
/* During index build, count the newly-added root page */
if (buildStats)
{
if (btree->isData)
buildStats->nDataPages++;
else
buildStats->nEntryPages++;
}
return;
}
else
......
......@@ -592,9 +592,11 @@ void
prepareDataScan(GinBtree btree, Relation index)
{
memset(btree, 0, sizeof(GinBtreeData));
btree->index = index;
btree->isMoveRight = dataIsMoveRight;
btree->findChildPage = dataLocateItem;
btree->isMoveRight = dataIsMoveRight;
btree->findItem = dataLocateLeafItem;
btree->findChildPtr = dataFindChildPtr;
btree->getLeftMostPage = dataGetLeftMostPage;
......@@ -603,6 +605,7 @@ prepareDataScan(GinBtree btree, Relation index)
btree->splitPage = dataSplitPage;
btree->fillRoot = dataFillRoot;
btree->isData = TRUE;
btree->searchMode = FALSE;
btree->isDelete = FALSE;
btree->fullScan = FALSE;
......@@ -628,7 +631,9 @@ prepareScanPostingTree(Relation index, BlockNumber rootBlkno, bool searchMode)
* Inserts array of item pointers, may execute several tree scan (very rare)
*/
void
insertItemPointer(GinPostingTreeScan *gdi, ItemPointerData *items, uint32 nitem)
ginInsertItemPointer(GinPostingTreeScan *gdi,
ItemPointerData *items, uint32 nitem,
GinStatsData *buildStats)
{
BlockNumber rootBlkno = gdi->stack->blkno;
......@@ -653,7 +658,7 @@ insertItemPointer(GinPostingTreeScan *gdi, ItemPointerData *items, uint32 nitem)
freeGinBtreeStack(gdi->stack);
}
else
ginInsertValue(&(gdi->btree), gdi->stack);
ginInsertValue(&(gdi->btree), gdi->stack, buildStats);
gdi->stack = NULL;
}
......
......@@ -659,8 +659,11 @@ prepareEntryScan(GinBtree btree, Relation index, OffsetNumber attnum, Datum valu
{
memset(btree, 0, sizeof(GinBtreeData));
btree->isMoveRight = entryIsMoveRight;
btree->index = index;
btree->ginstate = ginstate;
btree->findChildPage = entryLocateEntry;
btree->isMoveRight = entryIsMoveRight;
btree->findItem = entryLocateLeafEntry;
btree->findChildPtr = entryFindChildPtr;
btree->getLeftMostPage = entryGetLeftMostPage;
......@@ -669,13 +672,12 @@ prepareEntryScan(GinBtree btree, Relation index, OffsetNumber attnum, Datum valu
btree->splitPage = entrySplitPage;
btree->fillRoot = entryFillRoot;
btree->index = index;
btree->ginstate = ginstate;
btree->entryAttnum = attnum;
btree->entryValue = value;
btree->isDelete = FALSE;
btree->isData = FALSE;
btree->searchMode = FALSE;
btree->fullScan = FALSE;
btree->isBuild = FALSE;
btree->entryAttnum = attnum;
btree->entryValue = value;
btree->isDelete = FALSE;
}
......@@ -789,7 +789,7 @@ ginInsertCleanup(Relation index, GinState *ginstate,
ginBeginBAScan(&accum);
while ((list = ginGetEntry(&accum, &attnum, &entry, &nlist)) != NULL)
{
ginEntryInsert(index, ginstate, attnum, entry, list, nlist, FALSE);
ginEntryInsert(index, ginstate, attnum, entry, list, nlist, NULL);
if (vac_delay)
vacuum_delay_point();
}
......@@ -823,7 +823,7 @@ ginInsertCleanup(Relation index, GinState *ginstate,
ginBeginBAScan(&accum);
while ((list = ginGetEntry(&accum, &attnum, &entry, &nlist)) != NULL)
ginEntryInsert(index, ginstate, attnum, entry, list, nlist, FALSE);
ginEntryInsert(index, ginstate, attnum, entry, list, nlist, NULL);
}
/*
......
......@@ -27,6 +27,7 @@ typedef struct
{
GinState ginstate;
double indtuples;
GinStatsData buildStats;
MemoryContext tmpCtx;
MemoryContext funcCtx;
BuildAccumulator accum;
......@@ -97,8 +98,10 @@ createPostingTree(Relation index, ItemPointerData *items, uint32 nitems)
* GinFormTuple().
*/
static IndexTuple
addItemPointersToTuple(Relation index, GinState *ginstate, GinBtreeStack *stack,
IndexTuple old, ItemPointerData *items, uint32 nitem, bool isBuild)
addItemPointersToTuple(Relation index, GinState *ginstate,
GinBtreeStack *stack, IndexTuple old,
ItemPointerData *items, uint32 nitem,
GinStatsData *buildStats)
{
Datum key = gin_index_getattr(ginstate, old);
OffsetNumber attnum = gintuple_get_attrnum(ginstate, old);
......@@ -128,11 +131,15 @@ addItemPointersToTuple(Relation index, GinState *ginstate, GinBtreeStack *stack,
GinSetPostingTree(res, postingRoot);
gdi = prepareScanPostingTree(index, postingRoot, FALSE);
gdi->btree.isBuild = isBuild;
gdi->btree.isBuild = (buildStats != NULL);
insertItemPointer(gdi, items, nitem);
ginInsertItemPointer(gdi, items, nitem, buildStats);
pfree(gdi);
/* During index build, count the newly-added data page */
if (buildStats)
buildStats->nDataPages++;
}
return res;
......@@ -140,18 +147,25 @@ addItemPointersToTuple(Relation index, GinState *ginstate, GinBtreeStack *stack,
/*
* Inserts only one entry to the index, but it can add more than 1 ItemPointer.
*
* During an index build, buildStats is non-null and the counters
* it contains should be incremented as needed.
*/
void
ginEntryInsert(Relation index, GinState *ginstate,
OffsetNumber attnum, Datum value,
ItemPointerData *items, uint32 nitem,
bool isBuild)
GinStatsData *buildStats)
{
GinBtreeData btree;
GinBtreeStack *stack;
IndexTuple itup;
Page page;
/* During index build, count the to-be-inserted entry */
if (buildStats)
buildStats->nEntries++;
prepareEntryScan(&btree, index, attnum, value, ginstate);
stack = ginFindLeafPage(&btree, NULL);
......@@ -174,14 +188,15 @@ ginEntryInsert(Relation index, GinState *ginstate,
/* insert into posting tree */
gdi = prepareScanPostingTree(index, rootPostingTree, FALSE);
gdi->btree.isBuild = isBuild;
insertItemPointer(gdi, items, nitem);
gdi->btree.isBuild = (buildStats != NULL);
ginInsertItemPointer(gdi, items, nitem, buildStats);
pfree(gdi);
return;
}
itup = addItemPointersToTuple(index, ginstate, stack, itup, items, nitem, isBuild);
itup = addItemPointersToTuple(index, ginstate, stack, itup,
items, nitem, buildStats);
btree.isDelete = TRUE;
}
......@@ -195,13 +210,14 @@ ginEntryInsert(Relation index, GinState *ginstate,
/* Add the rest, making a posting tree if necessary */
IndexTuple previtup = itup;
itup = addItemPointersToTuple(index, ginstate, stack, previtup, items + 1, nitem - 1, isBuild);
itup = addItemPointersToTuple(index, ginstate, stack, previtup,
items + 1, nitem - 1, buildStats);
pfree(previtup);
}
}
btree.entry = itup;
ginInsertValue(&btree, stack);
ginInsertValue(&btree, stack, buildStats);
pfree(itup);
}
......@@ -260,7 +276,8 @@ ginBuildCallback(Relation index, HeapTuple htup, Datum *values,
{
/* there could be many entries, so be willing to abort here */
CHECK_FOR_INTERRUPTS();
ginEntryInsert(index, &buildstate->ginstate, attnum, entry, list, nlist, TRUE);
ginEntryInsert(index, &buildstate->ginstate, attnum, entry,
list, nlist, &buildstate->buildStats);
}
MemoryContextReset(buildstate->tmpCtx);
......@@ -292,6 +309,8 @@ ginbuild(PG_FUNCTION_ARGS)
RelationGetRelationName(index));
initGinState(&buildstate.ginstate, index);
buildstate.indtuples = 0;
memset(&buildstate.buildStats, 0, sizeof(GinStatsData));
/* initialize the meta page */
MetaBuffer = GinNewBuffer(index);
......@@ -331,8 +350,8 @@ ginbuild(PG_FUNCTION_ARGS)
UnlockReleaseBuffer(RootBuffer);
END_CRIT_SECTION();
/* build the index */
buildstate.indtuples = 0;
/* count the root as first entry page */
buildstate.buildStats.nEntryPages++;
/*
* create a temporary memory context that is reset once for each tuple
......@@ -367,12 +386,19 @@ ginbuild(PG_FUNCTION_ARGS)
{
/* there could be many entries, so be willing to abort here */
CHECK_FOR_INTERRUPTS();
ginEntryInsert(index, &buildstate.ginstate, attnum, entry, list, nlist, TRUE);
ginEntryInsert(index, &buildstate.ginstate, attnum, entry,
list, nlist, &buildstate.buildStats);
}
MemoryContextSwitchTo(oldCtx);
MemoryContextDelete(buildstate.tmpCtx);
/*
* Update metapage stats
*/
buildstate.buildStats.nTotalPages = RelationGetNumberOfBlocks(index);
ginUpdateStats(index, &buildstate.buildStats);
/*
* Return statistics
*/
......@@ -401,7 +427,7 @@ ginHeapTupleInsert(Relation index, GinState *ginstate, OffsetNumber attnum, Datu
return 0;
for (i = 0; i < nentries; i++)
ginEntryInsert(index, ginstate, attnum, entries[i], item, 1, FALSE);
ginEntryInsert(index, ginstate, attnum, entries[i], item, 1, NULL);
return nentries;
}
......
......@@ -13,10 +13,12 @@
*/
#include "postgres.h"
#include "access/genam.h"
#include "access/gin.h"
#include "access/reloptions.h"
#include "catalog/pg_type.h"
#include "miscadmin.h"
#include "storage/bufmgr.h"
#include "storage/freespace.h"
#include "storage/indexfsm.h"
......@@ -227,6 +229,10 @@ GinInitMetabuffer(Buffer b)
metadata->tailFreeSize = 0;
metadata->nPendingPages = 0;
metadata->nPendingHeapTuples = 0;
metadata->nTotalPages = 0;
metadata->nEntryPages = 0;
metadata->nDataPages = 0;
metadata->nEntries = 0;
}
int
......@@ -354,3 +360,82 @@ ginoptions(PG_FUNCTION_ARGS)
PG_RETURN_BYTEA_P(rdopts);
}
/*
* Fetch index's statistical data into *stats
*
* Note: in the result, nPendingPages can be trusted to be up-to-date,
* but the other fields are as of the last VACUUM.
*/
void
ginGetStats(Relation index, GinStatsData *stats)
{
Buffer metabuffer;
Page metapage;
GinMetaPageData *metadata;
metabuffer = ReadBuffer(index, GIN_METAPAGE_BLKNO);
LockBuffer(metabuffer, GIN_SHARE);
metapage = BufferGetPage(metabuffer);
metadata = GinPageGetMeta(metapage);
stats->nPendingPages = metadata->nPendingPages;
stats->nTotalPages = metadata->nTotalPages;
stats->nEntryPages = metadata->nEntryPages;
stats->nDataPages = metadata->nDataPages;
stats->nEntries = metadata->nEntries;
UnlockReleaseBuffer(metabuffer);
}
/*
* Write the given statistics to the index's metapage
*
* Note: nPendingPages is *not* copied over
*/
void
ginUpdateStats(Relation index, const GinStatsData *stats)
{
Buffer metabuffer;
Page metapage;
GinMetaPageData *metadata;
metabuffer = ReadBuffer(index, GIN_METAPAGE_BLKNO);
LockBuffer(metabuffer, GIN_EXCLUSIVE);
metapage = BufferGetPage(metabuffer);
metadata = GinPageGetMeta(metapage);
START_CRIT_SECTION();
metadata->nTotalPages = stats->nTotalPages;
metadata->nEntryPages = stats->nEntryPages;
metadata->nDataPages = stats->nDataPages;
metadata->nEntries = stats->nEntries;
MarkBufferDirty(metabuffer);
if (!index->rd_istemp)
{
XLogRecPtr recptr;
ginxlogUpdateMeta data;
XLogRecData rdata;
data.node = index->rd_node;
data.ntuples = 0;
data.newRightlink = data.prevTail = InvalidBlockNumber;
memcpy(&data.metadata, metadata, sizeof(GinMetaPageData));
rdata.buffer = InvalidBuffer;
rdata.data = (char *) &data;
rdata.len = sizeof(ginxlogUpdateMeta);
rdata.next = NULL;
recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_UPDATE_META_PAGE, &rdata);
PageSetLSN(metapage, recptr);
PageSetTLI(metapage, ThisTimeLineID);
}
UnlockReleaseBuffer(metabuffer);
END_CRIT_SECTION();
}
......@@ -707,9 +707,8 @@ ginvacuumcleanup(PG_FUNCTION_ARGS)
BlockNumber npages,
blkno;
BlockNumber totFreePages;
BlockNumber lastBlock = GIN_ROOT_BLKNO,
lastFilledBlock = GIN_ROOT_BLKNO;
GinState ginstate;
GinStatsData idxStat;
/*
* In an autovacuum analyze, we want to clean up pending insertions.
......@@ -736,6 +735,8 @@ ginvacuumcleanup(PG_FUNCTION_ARGS)
ginInsertCleanup(index, &ginstate, true, stats);
}
memset(&idxStat, 0, sizeof(idxStat));
/*
* XXX we always report the heap tuple count as the number of index
* entries. This is bogus if the index is partial, but it's real hard to
......@@ -757,7 +758,7 @@ ginvacuumcleanup(PG_FUNCTION_ARGS)
totFreePages = 0;
for (blkno = GIN_ROOT_BLKNO + 1; blkno < npages; blkno++)
for (blkno = GIN_ROOT_BLKNO; blkno < npages; blkno++)
{
Buffer buffer;
Page page;
......@@ -771,15 +772,28 @@ ginvacuumcleanup(PG_FUNCTION_ARGS)
if (GinPageIsDeleted(page))
{
Assert(blkno != GIN_ROOT_BLKNO);
RecordFreeIndexPage(index, blkno);
totFreePages++;
}
else
lastFilledBlock = blkno;
else if (GinPageIsData(page))
{
idxStat.nDataPages++;
}
else if (!GinPageIsList(page))
{
idxStat.nEntryPages++;
if ( GinPageIsLeaf(page) )
idxStat.nEntries += PageGetMaxOffsetNumber(page);
}
UnlockReleaseBuffer(buffer);
}
lastBlock = npages - 1;
/* Update the metapage with accurate page and entry counts */
idxStat.nTotalPages = npages;
ginUpdateStats(info->index, &idxStat);
/* Finally, vacuum the FSM */
IndexFreeSpaceMapVacuum(info->index);
......
......@@ -839,7 +839,7 @@ ginContinueSplit(ginIncompleteSplit *split)
stack.parent = NULL;
findParents(&btree, &stack, split->rootBlkno);
ginInsertValue(&btree, stack.parent);
ginInsertValue(&btree, stack.parent, NULL);
FreeFakeRelcacheEntry(reln);
......
This diff is collapsed.
......@@ -79,6 +79,14 @@ typedef struct GinMetaPageData
*/
BlockNumber nPendingPages;
int64 nPendingHeapTuples;
/*
* Statistics for planner use (accurate as of last VACUUM)
*/
BlockNumber nTotalPages;
BlockNumber nEntryPages;
BlockNumber nDataPages;
int64 nEntries;
} GinMetaPageData;
#define GinPageGetMeta(p) \
......@@ -94,6 +102,8 @@ typedef struct GinMetaPageData
#define GinPageSetNonLeaf(page) ( GinPageGetOpaque(page)->flags &= ~GIN_LEAF )
#define GinPageIsData(page) ( GinPageGetOpaque(page)->flags & GIN_DATA )
#define GinPageSetData(page) ( GinPageGetOpaque(page)->flags |= GIN_DATA )
#define GinPageIsList(page) ( GinPageGetOpaque(page)->flags & GIN_LIST )
#define GinPageSetList(page) ( GinPageGetOpaque(page)->flags |= GIN_LIST )
#define GinPageHasFullRow(page) ( GinPageGetOpaque(page)->flags & GIN_LIST_FULLROW )
#define GinPageSetFullRow(page) ( GinPageGetOpaque(page)->flags |= GIN_LIST_FULLROW )
......@@ -362,13 +372,28 @@ extern Datum *extractEntriesSU(GinState *ginstate, OffsetNumber attnum, Datum va
extern Datum gin_index_getattr(GinState *ginstate, IndexTuple tuple);
extern OffsetNumber gintuple_get_attrnum(GinState *ginstate, IndexTuple tuple);
/*
* GinStatsData represents stats data for planner use
*/
typedef struct GinStatsData
{
BlockNumber nPendingPages;
BlockNumber nTotalPages;
BlockNumber nEntryPages;
BlockNumber nDataPages;
int64 nEntries;
} GinStatsData;
extern void ginGetStats(Relation index, GinStatsData *stats);
extern void ginUpdateStats(Relation index, const GinStatsData *stats);
/* gininsert.c */
extern Datum ginbuild(PG_FUNCTION_ARGS);
extern Datum gininsert(PG_FUNCTION_ARGS);
extern void ginEntryInsert(Relation index, GinState *ginstate,
OffsetNumber attnum, Datum value,
ItemPointerData *items, uint32 nitem,
bool isBuild);
GinStatsData *buildStats);
/* ginxlog.c */
extern void gin_redo(XLogRecPtr lsn, XLogRecord *record);
......@@ -406,6 +431,7 @@ typedef struct GinBtreeData
Page (*splitPage) (GinBtree, Buffer, Buffer, OffsetNumber, XLogRecData **);
void (*fillRoot) (GinBtree, Buffer, Buffer, Buffer);
bool isData;
bool searchMode;
Relation index;
......@@ -432,7 +458,8 @@ typedef struct GinBtreeData
extern GinBtreeStack *ginPrepareFindLeafPage(GinBtree btree, BlockNumber blkno);
extern GinBtreeStack *ginFindLeafPage(GinBtree btree, GinBtreeStack *stack);
extern void freeGinBtreeStack(GinBtreeStack *stack);
extern void ginInsertValue(GinBtree btree, GinBtreeStack *stack);
extern void ginInsertValue(GinBtree btree, GinBtreeStack *stack,
GinStatsData *buildStats);
extern void findParents(GinBtree btree, GinBtreeStack *stack, BlockNumber rootBlkno);
/* ginentrypage.c */
......@@ -462,8 +489,9 @@ typedef struct
extern GinPostingTreeScan *prepareScanPostingTree(Relation index,
BlockNumber rootBlkno, bool searchMode);
extern void insertItemPointer(GinPostingTreeScan *gdi,
ItemPointerData *items, uint32 nitem);
extern void ginInsertItemPointer(GinPostingTreeScan *gdi,
ItemPointerData *items, uint32 nitem,
GinStatsData *buildStats);
extern Buffer scanBeginPostingTree(GinPostingTreeScan *gdi);
extern void dataFillRoot(GinBtree btree, Buffer root, Buffer lbuf, Buffer rbuf);
extern void prepareDataScan(GinBtree btree, Relation index);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment