Commit e20c70cb authored by Heikki Linnakangas's avatar Heikki Linnakangas

Allow skipping some items in a multi-key GIN search.

In a multi-key search, ie. something like "col @> 'foo' AND col @> 'bar'",
as soon as we find the next item that matches the first criteria, we don't
need to check the second criteria for TIDs smaller the first match. That
saves a lot of effort, especially if one of the terms is rare, while the
second occurs very frequently.

Based on ideas from Alexander Korotkov's fast scan patch.
parent 2013e5ee
...@@ -67,29 +67,6 @@ callConsistentFn(GinState *ginstate, GinScanKey key) ...@@ -67,29 +67,6 @@ callConsistentFn(GinState *ginstate, GinScanKey key)
PointerGetDatum(key->queryCategories))); PointerGetDatum(key->queryCategories)));
} }
/*
* Tries to refind previously taken ItemPointer on a posting page.
*/
static bool
needToStepRight(Page page, ItemPointer item)
{
if (GinPageGetOpaque(page)->flags & GIN_DELETED)
/* page was deleted by concurrent vacuum */
return true;
if (ginCompareItemPointers(item, GinDataPageGetRightBound(page)) > 0
&& !GinPageRightMost(page))
{
/*
* the item we're looking is > the right bound of the page, so it
* can't be on this page.
*/
return true;
}
return false;
}
/* /*
* Goes to the next page if current offset is outside of bounds * Goes to the next page if current offset is outside of bounds
*/ */
...@@ -447,8 +424,7 @@ restartScanEntry: ...@@ -447,8 +424,7 @@ restartScanEntry:
page = BufferGetPage(entry->buffer); page = BufferGetPage(entry->buffer);
/* /*
* Copy page content to memory to avoid keeping it locked for * Load the first page into memory.
* a long time.
*/ */
entry->list = GinDataLeafPageGetItems(page, &entry->nlist); entry->list = GinDataLeafPageGetItems(page, &entry->nlist);
...@@ -518,88 +494,78 @@ startScan(IndexScanDesc scan) ...@@ -518,88 +494,78 @@ startScan(IndexScanDesc scan)
} }
/* /*
* Gets next ItemPointer from PostingTree. Note, that we copy * Load the next batch of item pointers from a posting tree.
* page into GinScanEntry->list array and unlock page, but keep it pinned *
* to prevent interference with vacuum * Note that we copy the page into GinScanEntry->list array and unlock it, but
* keep it pinned to prevent interference with vacuum.
*/ */
static void static void
entryGetNextItem(GinState *ginstate, GinScanEntry entry) entryLoadMoreItems(GinState *ginstate, GinScanEntry entry, ItemPointerData advancePast)
{ {
Page page; Page page;
int i; int i;
LockBuffer(entry->buffer, GIN_SHARE);
page = BufferGetPage(entry->buffer);
for (;;) for (;;)
{ {
if (entry->offset < entry->nlist) entry->offset = InvalidOffsetNumber;
if (entry->list)
{ {
entry->curItem = entry->list[entry->offset++]; pfree(entry->list);
return; entry->list = NULL;
entry->nlist = 0;
} }
LockBuffer(entry->buffer, GIN_SHARE); /*
page = BufferGetPage(entry->buffer); * We've processed all the entries on this page. If it was the last
for (;;) * page in the tree, we're done.
*/
if (GinPageRightMost(page))
{ {
/* UnlockReleaseBuffer(entry->buffer);
* It's needed to go by right link. During that we should refind entry->buffer = InvalidBuffer;
* first ItemPointer greater that stored entry->isFinished = TRUE;
*/ return;
if (GinPageRightMost(page)) }
{
UnlockReleaseBuffer(entry->buffer);
ItemPointerSetInvalid(&entry->curItem);
entry->buffer = InvalidBuffer;
entry->isFinished = TRUE;
return;
}
entry->buffer = ginStepRight(entry->buffer, if (GinPageGetOpaque(page)->flags & GIN_DELETED)
ginstate->index, continue; /* page was deleted by concurrent vacuum */
GIN_SHARE);
page = BufferGetPage(entry->buffer);
entry->offset = InvalidOffsetNumber; /*
if (entry->list) * Step to next page, following the right link. then find the first
{ * ItemPointer greater than advancePast.
pfree(entry->list); */
entry->list = NULL; entry->buffer = ginStepRight(entry->buffer,
} ginstate->index,
GIN_SHARE);
page = BufferGetPage(entry->buffer);
/*
* The first item > advancePast might not be on this page, but
* somewhere to the right, if the page was split, or a non-match from
* another key in the query allowed us to skip some items from this
* entry. Keep following the right-links until we re-find the correct
* page.
*/
if (!GinPageRightMost(page) &&
ginCompareItemPointers(&advancePast, GinDataPageGetRightBound(page)) >= 0)
{
/* /*
* If the page was concurrently split, we have to re-find the * the item we're looking is > the right bound of the page, so it
* item we were stopped on. If the page was split more than once, * can't be on this page.
* the item might not be on this page, but somewhere to the right.
* Keep following the right-links until we re-find the correct
* page.
*/ */
if (ItemPointerIsValid(&entry->curItem) && continue;
needToStepRight(page, &entry->curItem)) }
{
continue;
}
entry->list = GinDataLeafPageGetItems(page, &entry->nlist); entry->list = GinDataLeafPageGetItems(page, &entry->nlist);
/* re-find the item we were stopped on. */ for (i = 0; i < entry->nlist; i++)
if (ItemPointerIsValid(&entry->curItem)) {
{ if (ginCompareItemPointers(&advancePast, &entry->list[i]) < 0)
for (i = 0; i < entry->nlist; i++)
{
if (ginCompareItemPointers(&entry->curItem,
&entry->list[i]) < 0)
{
LockBuffer(entry->buffer, GIN_UNLOCK);
entry->offset = i + 1;
entry->curItem = entry->list[entry->offset - 1];
return;
}
}
}
else
{ {
LockBuffer(entry->buffer, GIN_UNLOCK); LockBuffer(entry->buffer, GIN_UNLOCK);
entry->offset = 1; /* scan all items on the page. */ entry->offset = i;
entry->curItem = entry->list[entry->offset - 1];
return; return;
} }
} }
...@@ -610,10 +576,10 @@ entryGetNextItem(GinState *ginstate, GinScanEntry entry) ...@@ -610,10 +576,10 @@ entryGetNextItem(GinState *ginstate, GinScanEntry entry)
#define dropItem(e) ( gin_rand() > ((double)GinFuzzySearchLimit)/((double)((e)->predictNumberResult)) ) #define dropItem(e) ( gin_rand() > ((double)GinFuzzySearchLimit)/((double)((e)->predictNumberResult)) )
/* /*
* Sets entry->curItem to next heap item pointer for one entry of one scan key, * Sets entry->curItem to next heap item pointer > advancePast, for one entry
* or sets entry->isFinished to TRUE if there are no more. * of one scan key, or sets entry->isFinished to TRUE if there are no more.
* *
* Item pointers must be returned in ascending order. * Item pointers are returned in ascending order.
* *
* Note: this can return a "lossy page" item pointer, indicating that the * Note: this can return a "lossy page" item pointer, indicating that the
* entry potentially matches all items on that heap page. However, it is * entry potentially matches all items on that heap page. However, it is
...@@ -623,12 +589,20 @@ entryGetNextItem(GinState *ginstate, GinScanEntry entry) ...@@ -623,12 +589,20 @@ entryGetNextItem(GinState *ginstate, GinScanEntry entry)
* current implementation this is guaranteed by the behavior of tidbitmaps. * current implementation this is guaranteed by the behavior of tidbitmaps.
*/ */
static void static void
entryGetItem(GinState *ginstate, GinScanEntry entry) entryGetItem(GinState *ginstate, GinScanEntry entry,
ItemPointerData advancePast)
{ {
Assert(!entry->isFinished); Assert(!entry->isFinished);
Assert(!ItemPointerIsValid(&entry->curItem) ||
ginCompareItemPointers(&entry->curItem, &advancePast) <= 0);
if (entry->matchBitmap) if (entry->matchBitmap)
{ {
/* A bitmap result */
BlockNumber advancePastBlk = GinItemPointerGetBlockNumber(&advancePast);
OffsetNumber advancePastOff = GinItemPointerGetOffsetNumber(&advancePast);
do do
{ {
if (entry->matchResult == NULL || if (entry->matchResult == NULL ||
...@@ -645,6 +619,18 @@ entryGetItem(GinState *ginstate, GinScanEntry entry) ...@@ -645,6 +619,18 @@ entryGetItem(GinState *ginstate, GinScanEntry entry)
break; break;
} }
/*
* If all the matches on this page are <= advancePast, skip
* to next page.
*/
if (entry->matchResult->blockno < advancePastBlk ||
(entry->matchResult->blockno == advancePastBlk &&
entry->matchResult->offsets[entry->offset] <= advancePastOff))
{
entry->offset = entry->matchResult->ntuples;
continue;
}
/* /*
* Reset counter to the beginning of entry->matchResult. Note: * Reset counter to the beginning of entry->matchResult. Note:
* entry->offset is still greater than matchResult->ntuples if * entry->offset is still greater than matchResult->ntuples if
...@@ -670,6 +656,17 @@ entryGetItem(GinState *ginstate, GinScanEntry entry) ...@@ -670,6 +656,17 @@ entryGetItem(GinState *ginstate, GinScanEntry entry)
break; break;
} }
if (entry->matchResult->blockno == advancePastBlk)
{
/*
* Skip to the right offset on this page. We already checked
* in above loop that there is at least one item > advancePast
* on the page.
*/
while (entry->matchResult->offsets[entry->offset] <= advancePastOff)
entry->offset++;
}
ItemPointerSet(&entry->curItem, ItemPointerSet(&entry->curItem,
entry->matchResult->blockno, entry->matchResult->blockno,
entry->matchResult->offsets[entry->offset]); entry->matchResult->offsets[entry->offset]);
...@@ -678,29 +675,48 @@ entryGetItem(GinState *ginstate, GinScanEntry entry) ...@@ -678,29 +675,48 @@ entryGetItem(GinState *ginstate, GinScanEntry entry)
} }
else if (!BufferIsValid(entry->buffer)) else if (!BufferIsValid(entry->buffer))
{ {
entry->offset++; /* A posting list from an entry tuple */
if (entry->offset <= entry->nlist) do
entry->curItem = entry->list[entry->offset - 1];
else
{ {
ItemPointerSetInvalid(&entry->curItem); if (entry->offset >= entry->nlist)
entry->isFinished = TRUE; {
} ItemPointerSetInvalid(&entry->curItem);
entry->isFinished = TRUE;
break;
}
entry->curItem = entry->list[entry->offset++];
} while (ginCompareItemPointers(&entry->curItem, &advancePast) <= 0);
/* XXX: shouldn't we apply the fuzzy search limit here? */
} }
else else
{ {
/* A posting tree */
do do
{ {
entryGetNextItem(ginstate, entry); /* If we've processed the current batch, load more items */
} while (entry->isFinished == FALSE && while (entry->offset >= entry->nlist)
entry->reduceResult == TRUE && {
dropItem(entry)); entryLoadMoreItems(ginstate, entry, advancePast);
if (entry->isFinished)
{
ItemPointerSetInvalid(&entry->curItem);
return;
}
}
entry->curItem = entry->list[entry->offset++];
} while (ginCompareItemPointers(&entry->curItem, &advancePast) <= 0 ||
(entry->reduceResult == TRUE && dropItem(entry)));
} }
} }
/* /*
* Identify the "current" item among the input entry streams for this scan key, * Identify the "current" item among the input entry streams for this scan key
* and test whether it passes the scan key qual condition. * that is greater than advancePast, and test whether it passes the scan key
* qual condition.
* *
* The current item is the smallest curItem among the inputs. key->curItem * The current item is the smallest curItem among the inputs. key->curItem
* is set to that value. key->curItemMatches is set to indicate whether that * is set to that value. key->curItemMatches is set to indicate whether that
...@@ -719,7 +735,8 @@ entryGetItem(GinState *ginstate, GinScanEntry entry) ...@@ -719,7 +735,8 @@ entryGetItem(GinState *ginstate, GinScanEntry entry)
* logic in scanGetItem.) * logic in scanGetItem.)
*/ */
static void static void
keyGetItem(GinState *ginstate, MemoryContext tempCtx, GinScanKey key) keyGetItem(GinState *ginstate, MemoryContext tempCtx, GinScanKey key,
ItemPointerData advancePast)
{ {
ItemPointerData minItem; ItemPointerData minItem;
ItemPointerData curPageLossy; ItemPointerData curPageLossy;
...@@ -729,11 +746,20 @@ keyGetItem(GinState *ginstate, MemoryContext tempCtx, GinScanKey key) ...@@ -729,11 +746,20 @@ keyGetItem(GinState *ginstate, MemoryContext tempCtx, GinScanKey key)
GinScanEntry entry; GinScanEntry entry;
bool res; bool res;
MemoryContext oldCtx; MemoryContext oldCtx;
bool allFinished;
Assert(!key->isFinished); Assert(!key->isFinished);
/* /*
* Find the minimum of the active entry curItems. * We might have already tested this item; if so, no need to repeat work.
* (Note: the ">" case can happen, if minItem is exact but we previously
* had to set curItem to a lossy-page pointer.)
*/
if (ginCompareItemPointers(&key->curItem, &advancePast) > 0)
return;
/*
* Find the minimum item > advancePast among the active entry streams.
* *
* Note: a lossy-page entry is encoded by a ItemPointer with max value for * Note: a lossy-page entry is encoded by a ItemPointer with max value for
* offset (0xffff), so that it will sort after any exact entries for the * offset (0xffff), so that it will sort after any exact entries for the
...@@ -741,16 +767,33 @@ keyGetItem(GinState *ginstate, MemoryContext tempCtx, GinScanKey key) ...@@ -741,16 +767,33 @@ keyGetItem(GinState *ginstate, MemoryContext tempCtx, GinScanKey key)
* pointers, which is good. * pointers, which is good.
*/ */
ItemPointerSetMax(&minItem); ItemPointerSetMax(&minItem);
allFinished = true;
for (i = 0; i < key->nentries; i++) for (i = 0; i < key->nentries; i++)
{ {
entry = key->scanEntry[i]; entry = key->scanEntry[i];
if (entry->isFinished == FALSE &&
ginCompareItemPointers(&entry->curItem, &minItem) < 0) /*
minItem = entry->curItem; * Advance this stream if necessary.
*
* In particular, since entry->curItem was initialized with
* ItemPointerSetMin, this ensures we fetch the first item for each
* entry on the first call.
*/
while (entry->isFinished == FALSE &&
ginCompareItemPointers(&entry->curItem, &advancePast) <= 0)
{
entryGetItem(ginstate, entry, advancePast);
}
if (!entry->isFinished)
{
allFinished = FALSE;
if (ginCompareItemPointers(&entry->curItem, &minItem) < 0)
minItem = entry->curItem;
}
} }
if (ItemPointerIsMax(&minItem)) if (allFinished)
{ {
/* all entries are finished */ /* all entries are finished */
key->isFinished = TRUE; key->isFinished = TRUE;
...@@ -758,15 +801,7 @@ keyGetItem(GinState *ginstate, MemoryContext tempCtx, GinScanKey key) ...@@ -758,15 +801,7 @@ keyGetItem(GinState *ginstate, MemoryContext tempCtx, GinScanKey key)
} }
/* /*
* We might have already tested this item; if so, no need to repeat work. * OK, set key->curItem and perform consistentFn test.
* (Note: the ">" case can happen, if minItem is exact but we previously
* had to set curItem to a lossy-page pointer.)
*/
if (ginCompareItemPointers(&key->curItem, &minItem) >= 0)
return;
/*
* OK, advance key->curItem and perform consistentFn test.
*/ */
key->curItem = minItem; key->curItem = minItem;
...@@ -895,117 +930,122 @@ keyGetItem(GinState *ginstate, MemoryContext tempCtx, GinScanKey key) ...@@ -895,117 +930,122 @@ keyGetItem(GinState *ginstate, MemoryContext tempCtx, GinScanKey key)
* keyGetItem() the combination logic is known only to the consistentFn. * keyGetItem() the combination logic is known only to the consistentFn.
*/ */
static bool static bool
scanGetItem(IndexScanDesc scan, ItemPointer advancePast, scanGetItem(IndexScanDesc scan, ItemPointerData advancePast,
ItemPointerData *item, bool *recheck) ItemPointerData *item, bool *recheck)
{ {
GinScanOpaque so = (GinScanOpaque) scan->opaque; GinScanOpaque so = (GinScanOpaque) scan->opaque;
GinState *ginstate = &so->ginstate;
ItemPointerData myAdvancePast = *advancePast;
uint32 i; uint32 i;
bool allFinished;
bool match; bool match;
for (;;) /*----------
* Advance the scan keys in lock-step, until we find an item that matches
* all the keys. If any key reports isFinished, meaning its subset of the
* entries is exhausted, we can stop. Otherwise, set *item to the next
* matching item.
*
* This logic works only if a keyGetItem stream can never contain both
* exact and lossy pointers for the same page. Else we could have a
* case like
*
* stream 1 stream 2
* ... ...
* 42/6 42/7
* 50/1 42/0xffff
* ... ...
*
* We would conclude that 42/6 is not a match and advance stream 1,
* thus never detecting the match to the lossy pointer in stream 2.
* (keyGetItem has a similar problem versus entryGetItem.)
*----------
*/
do
{ {
/* ItemPointerSetMin(item);
* Advance any entries that are <= myAdvancePast. In particular, match = true;
* since entry->curItem was initialized with ItemPointerSetMin, this for (i = 0; i < so->nkeys && match; i++)
* ensures we fetch the first item for each entry on the first call.
*/
allFinished = TRUE;
for (i = 0; i < so->totalentries; i++)
{
GinScanEntry entry = so->entries[i];
while (entry->isFinished == FALSE &&
ginCompareItemPointers(&entry->curItem,
&myAdvancePast) <= 0)
entryGetItem(ginstate, entry);
if (entry->isFinished == FALSE)
allFinished = FALSE;
}
if (allFinished)
{
/* all entries exhausted, so we're done */
return false;
}
/*
* Perform the consistentFn test for each scan key. If any key
* reports isFinished, meaning its subset of the entries is exhausted,
* we can stop. Otherwise, set *item to the minimum of the key
* curItems.
*/
ItemPointerSetMax(item);
for (i = 0; i < so->nkeys; i++)
{ {
GinScanKey key = so->keys + i; GinScanKey key = so->keys + i;
keyGetItem(&so->ginstate, so->tempCtx, key); /* Fetch the next item for this key that is > advancePast. */
keyGetItem(&so->ginstate, so->tempCtx, key, advancePast);
if (key->isFinished) if (key->isFinished)
return false; /* finished one of keys */ return false;
if (ginCompareItemPointers(&key->curItem, item) < 0)
*item = key->curItem;
}
Assert(!ItemPointerIsMax(item)); /*
* If it's not a match, we can immediately conclude that nothing
* <= this item matches, without checking the rest of the keys.
*/
if (!key->curItemMatches)
{
advancePast = key->curItem;
match = false;
break;
}
/*---------- /*
* Now *item contains first ItemPointer after previous result. * It's a match. We can conclude that nothing < matches, so
* * the other key streams can skip to this item.
* The item is a valid hit only if all the keys succeeded for either *
* that exact TID, or a lossy reference to the same page. * Beware of lossy pointers, though; from a lossy pointer, we
* * can only conclude that nothing smaller than this *block*
* This logic works only if a keyGetItem stream can never contain both * matches.
* exact and lossy pointers for the same page. Else we could have a */
* case like if (ItemPointerIsLossyPage(&key->curItem))
* {
* stream 1 stream 2 if (GinItemPointerGetBlockNumber(&advancePast) <
* ... ... GinItemPointerGetBlockNumber(&key->curItem))
* 42/6 42/7 {
* 50/1 42/0xffff advancePast.ip_blkid = key->curItem.ip_blkid;
* ... ... advancePast.ip_posid = 0;
* }
* We would conclude that 42/6 is not a match and advance stream 1, }
* thus never detecting the match to the lossy pointer in stream 2. else
* (keyGetItem has a similar problem versus entryGetItem.) {
*---------- Assert(key->curItem.ip_posid > 0);
*/ advancePast = key->curItem;
match = true; advancePast.ip_posid--;
for (i = 0; i < so->nkeys; i++) }
{
GinScanKey key = so->keys + i;
if (key->curItemMatches) /*
* If this is the first key, remember this location as a
* potential match.
*
* Otherwise, check if this is the same item that we checked the
* previous keys for (or a lossy pointer for the same page). If
* not, loop back to check the previous keys for this item (we
* will check this key again too, but keyGetItem returns quickly
* for that)
*/
if (i == 0)
{ {
if (ginCompareItemPointers(item, &key->curItem) == 0) *item = key->curItem;
continue; }
if (ItemPointerIsLossyPage(&key->curItem) && else
GinItemPointerGetBlockNumber(&key->curItem) == {
GinItemPointerGetBlockNumber(item)) if (ItemPointerIsLossyPage(&key->curItem) ||
continue; ItemPointerIsLossyPage(item))
{
Assert (GinItemPointerGetBlockNumber(&key->curItem) >= GinItemPointerGetBlockNumber(item));
match = (GinItemPointerGetBlockNumber(&key->curItem) ==
GinItemPointerGetBlockNumber(item));
}
else
{
Assert(ginCompareItemPointers(&key->curItem, item) >= 0);
match = (ginCompareItemPointers(&key->curItem, item) == 0);
}
} }
match = false;
break;
} }
} while (!match);
if (match) Assert(!ItemPointerIsMin(item));
break;
/*
* No hit. Update myAdvancePast to this TID, so that on the next pass
* we'll move to the next possible entry.
*/
myAdvancePast = *item;
}
/* /*
* Now *item contains the first ItemPointer after previous result that
* satisfied all the keys for that exact TID, or a lossy reference
* to the same page.
*
* We must return recheck = true if any of the keys are marked recheck. * We must return recheck = true if any of the keys are marked recheck.
*/ */
*recheck = false; *recheck = false;
...@@ -1536,7 +1576,7 @@ gingetbitmap(PG_FUNCTION_ARGS) ...@@ -1536,7 +1576,7 @@ gingetbitmap(PG_FUNCTION_ARGS)
{ {
CHECK_FOR_INTERRUPTS(); CHECK_FOR_INTERRUPTS();
if (!scanGetItem(scan, &iptr, &iptr, &recheck)) if (!scanGetItem(scan, iptr, &iptr, &recheck))
break; break;
if (ItemPointerIsLossyPage(&iptr)) if (ItemPointerIsLossyPage(&iptr))
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment