Commit 7c70996e authored by Tom Lane's avatar Tom Lane

Allow bitmap scans to operate as index-only scans when possible.

If we don't have to return any columns from heap tuples, and there's
no need to recheck qual conditions, and the heap page is all-visible,
then we can skip fetching the heap page altogether.

Skip prefetching pages too, when possible, on the assumption that the
recheck flag will remain the same from one page to the next.  While that
assumption is hardly bulletproof, it seems like a good bet most of the
time, and better than prefetching pages we don't need.

This commit installs the executor infrastructure, but doesn't change
any planner cost estimates, thus possibly causing bitmap scans to
not be chosen in cases where this change renders them the best choice.
I (tgl) am not entirely convinced that we need to account for this
behavior in the planner, because I think typically the bitmap scan would
get chosen anyway if it's the best bet.  In any case the submitted patch
took way too many shortcuts, resulting in too many clearly-bad choices,
to be committable.

Alexander Kuzmenkov, reviewed by Alexey Chernyshov, and whacked around
rather heavily by me.

Discussion: https://postgr.es/m/239a8955-c0fc-f506-026d-c837e86c827b@postgrespro.ru
parent ec7ce542
......@@ -39,6 +39,7 @@
#include "access/relscan.h"
#include "access/transam.h"
#include "access/visibilitymap.h"
#include "executor/execdebug.h"
#include "executor/nodeBitmapHeapscan.h"
#include "miscadmin.h"
......@@ -224,10 +225,32 @@ BitmapHeapNext(BitmapHeapScanState *node)
continue;
}
/*
* We can skip fetching the heap page if we don't need any fields
* from the heap, and the bitmap entries don't need rechecking,
* and all tuples on the page are visible to our transaction.
*/
node->skip_fetch = (node->can_skip_fetch &&
!tbmres->recheck &&
VM_ALL_VISIBLE(node->ss.ss_currentRelation,
tbmres->blockno,
&node->vmbuffer));
if (node->skip_fetch)
{
/*
* The number of tuples on this page is put into
* scan->rs_ntuples; note we don't fill scan->rs_vistuples.
*/
scan->rs_ntuples = tbmres->ntuples;
}
else
{
/*
* Fetch the current heap page and identify candidate tuples.
*/
bitgetpage(scan, tbmres);
}
if (tbmres->ntuples >= 0)
node->exact_pages++;
......@@ -289,8 +312,17 @@ BitmapHeapNext(BitmapHeapScanState *node)
*/
BitmapPrefetch(node, scan);
if (node->skip_fetch)
{
/*
* Okay to fetch the tuple
* If we don't have to fetch the tuple, just return nulls.
*/
ExecStoreAllNullTuple(slot);
}
else
{
/*
* Okay to fetch the tuple.
*/
targoffset = scan->rs_vistuples[scan->rs_cindex];
dp = (Page) BufferGetPage(scan->rs_cbuf);
......@@ -305,8 +337,8 @@ BitmapHeapNext(BitmapHeapScanState *node)
pgstat_count_heap_fetch(scan->rs_rd);
/*
* Set up the result slot to point to this tuple. Note that the slot
* acquires a pin on the buffer.
* Set up the result slot to point to this tuple. Note that the
* slot acquires a pin on the buffer.
*/
ExecStoreTuple(&scan->rs_ctup,
slot,
......@@ -314,8 +346,8 @@ BitmapHeapNext(BitmapHeapScanState *node)
false);
/*
* If we are using lossy info, we have to recheck the qual conditions
* at every tuple.
* If we are using lossy info, we have to recheck the qual
* conditions at every tuple.
*/
if (tbmres->recheck)
{
......@@ -330,6 +362,7 @@ BitmapHeapNext(BitmapHeapScanState *node)
continue;
}
}
}
/* OK to return this tuple */
return slot;
......@@ -582,6 +615,7 @@ BitmapPrefetch(BitmapHeapScanState *node, HeapScanDesc scan)
while (node->prefetch_pages < node->prefetch_target)
{
TBMIterateResult *tbmpre = tbm_iterate(prefetch_iterator);
bool skip_fetch;
if (tbmpre == NULL)
{
......@@ -591,6 +625,25 @@ BitmapPrefetch(BitmapHeapScanState *node, HeapScanDesc scan)
break;
}
node->prefetch_pages++;
/*
* If we expect not to have to actually read this heap page,
* skip this prefetch call, but continue to run the prefetch
* logic normally. (Would it be better not to increment
* prefetch_pages?)
*
* This depends on the assumption that the index AM will
* report the same recheck flag for this future heap page as
* it did for the current heap page; which is not a certainty
* but is true in many cases.
*/
skip_fetch = (node->can_skip_fetch &&
(node->tbmres ? !node->tbmres->recheck : false) &&
VM_ALL_VISIBLE(node->ss.ss_currentRelation,
tbmpre->blockno,
&node->pvmbuffer));
if (!skip_fetch)
PrefetchBuffer(scan->rs_rd, MAIN_FORKNUM, tbmpre->blockno);
}
}
......@@ -608,6 +661,7 @@ BitmapPrefetch(BitmapHeapScanState *node, HeapScanDesc scan)
{
TBMIterateResult *tbmpre;
bool do_prefetch = false;
bool skip_fetch;
/*
* Recheck under the mutex. If some other process has already
......@@ -633,6 +687,14 @@ BitmapPrefetch(BitmapHeapScanState *node, HeapScanDesc scan)
break;
}
/* As above, skip prefetch if we expect not to need page */
skip_fetch = (node->can_skip_fetch &&
(node->tbmres ? !node->tbmres->recheck : false) &&
VM_ALL_VISIBLE(node->ss.ss_currentRelation,
tbmpre->blockno,
&node->pvmbuffer));
if (!skip_fetch)
PrefetchBuffer(scan->rs_rd, MAIN_FORKNUM, tbmpre->blockno);
}
}
......@@ -687,6 +749,7 @@ ExecReScanBitmapHeapScan(BitmapHeapScanState *node)
/* rescan to release any page pin */
heap_rescan(node->ss.ss_currentScanDesc, NULL);
/* release bitmaps and buffers if any */
if (node->tbmiterator)
tbm_end_iterate(node->tbmiterator);
if (node->prefetch_iterator)
......@@ -697,6 +760,10 @@ ExecReScanBitmapHeapScan(BitmapHeapScanState *node)
tbm_end_shared_iterate(node->shared_prefetch_iterator);
if (node->tbm)
tbm_free(node->tbm);
if (node->vmbuffer != InvalidBuffer)
ReleaseBuffer(node->vmbuffer);
if (node->pvmbuffer != InvalidBuffer)
ReleaseBuffer(node->pvmbuffer);
node->tbm = NULL;
node->tbmiterator = NULL;
node->tbmres = NULL;
......@@ -704,6 +771,8 @@ ExecReScanBitmapHeapScan(BitmapHeapScanState *node)
node->initialized = false;
node->shared_tbmiterator = NULL;
node->shared_prefetch_iterator = NULL;
node->vmbuffer = InvalidBuffer;
node->pvmbuffer = InvalidBuffer;
ExecScanReScan(&node->ss);
......@@ -748,7 +817,7 @@ ExecEndBitmapHeapScan(BitmapHeapScanState *node)
ExecEndNode(outerPlanState(node));
/*
* release bitmap if any
* release bitmaps and buffers if any
*/
if (node->tbmiterator)
tbm_end_iterate(node->tbmiterator);
......@@ -760,6 +829,10 @@ ExecEndBitmapHeapScan(BitmapHeapScanState *node)
tbm_end_shared_iterate(node->shared_tbmiterator);
if (node->shared_prefetch_iterator)
tbm_end_shared_iterate(node->shared_prefetch_iterator);
if (node->vmbuffer != InvalidBuffer)
ReleaseBuffer(node->vmbuffer);
if (node->pvmbuffer != InvalidBuffer)
ReleaseBuffer(node->pvmbuffer);
/*
* close heap scan
......@@ -805,6 +878,9 @@ ExecInitBitmapHeapScan(BitmapHeapScan *node, EState *estate, int eflags)
scanstate->tbm = NULL;
scanstate->tbmiterator = NULL;
scanstate->tbmres = NULL;
scanstate->skip_fetch = false;
scanstate->vmbuffer = InvalidBuffer;
scanstate->pvmbuffer = InvalidBuffer;
scanstate->exact_pages = 0;
scanstate->lossy_pages = 0;
scanstate->prefetch_iterator = NULL;
......@@ -815,8 +891,19 @@ ExecInitBitmapHeapScan(BitmapHeapScan *node, EState *estate, int eflags)
scanstate->pscan_len = 0;
scanstate->initialized = false;
scanstate->shared_tbmiterator = NULL;
scanstate->shared_prefetch_iterator = NULL;
scanstate->pstate = NULL;
/*
* We can potentially skip fetching heap pages if we do not need any
* columns of the table, either for checking non-indexable quals or for
* returning data. This test is a bit simplistic, as it checks the
* stronger condition that there's no qual or return tlist at all. But in
* most cases it's probably not worth working harder than that.
*/
scanstate->can_skip_fetch = (node->scan.plan.qual == NIL &&
node->scan.plan.targetlist == NIL);
/*
* Miscellaneous initialization
*
......
......@@ -807,6 +807,15 @@ use_physical_tlist(PlannerInfo *root, Path *path, int flags)
if (IsA(path, CustomPath))
return false;
/*
* If a bitmap scan's tlist is empty, keep it as-is. This may allow the
* executor to skip heap page fetches, and in any case, the benefit of
* using a physical tlist instead would be minimal.
*/
if (IsA(path, BitmapHeapPath) &&
path->pathtarget->exprs == NIL)
return false;
/*
* Can't do it if any system columns or whole-row Vars are requested.
* (This could possibly be fixed but would take some fragile assumptions
......
......@@ -1331,6 +1331,10 @@ typedef struct ParallelBitmapHeapState
* tbm bitmap obtained from child index scan(s)
* tbmiterator iterator for scanning current pages
* tbmres current-page data
* can_skip_fetch can we potentially skip tuple fetches in this scan?
* skip_fetch are we skipping tuple fetches on this page?
* vmbuffer buffer for visibility-map lookups
* pvmbuffer ditto, for prefetched pages
* exact_pages total number of exact pages retrieved
* lossy_pages total number of lossy pages retrieved
* prefetch_iterator iterator for prefetching ahead of current page
......@@ -1351,6 +1355,10 @@ typedef struct BitmapHeapScanState
TIDBitmap *tbm;
TBMIterator *tbmiterator;
TBMIterateResult *tbmres;
bool can_skip_fetch;
bool skip_fetch;
Buffer vmbuffer;
Buffer pvmbuffer;
long exact_pages;
long lossy_pages;
TBMIterator *prefetch_iterator;
......
......@@ -136,12 +136,15 @@ SELECT count(*) FROM tenk2;
(1 row)
-- do an indexscan
-- make sure it is not a bitmap scan, which might skip fetching heap tuples
SET enable_bitmapscan TO off;
SELECT count(*) FROM tenk2 WHERE unique1 = 1;
count
-------
1
(1 row)
RESET enable_bitmapscan;
-- We can't just call wait_for_stats() at this point, because we only
-- transmit stats when the session goes idle, and we probably didn't
-- transmit the last couple of counts yet thanks to the rate-limiting logic
......
......@@ -138,7 +138,10 @@ ROLLBACK;
-- do a seqscan
SELECT count(*) FROM tenk2;
-- do an indexscan
-- make sure it is not a bitmap scan, which might skip fetching heap tuples
SET enable_bitmapscan TO off;
SELECT count(*) FROM tenk2 WHERE unique1 = 1;
RESET enable_bitmapscan;
-- We can't just call wait_for_stats() at this point, because we only
-- transmit stats when the session goes idle, and we probably didn't
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment