Commit e6858e66 authored by Tom Lane's avatar Tom Lane

Measure the number of all-visible pages for use in index-only scan costing.

Add a column pg_class.relallvisible to remember the number of pages that
were all-visible according to the visibility map as of the last VACUUM
(or ANALYZE, or some other operations that update pg_class.relpages).
Use relallvisible/relpages, instead of an arbitrary constant, to estimate
how many heap page fetches can be avoided during an index-only scan.

This is pretty primitive and will no doubt see refinements once we've
acquired more field experience with the index-only scan mechanism, but
it's way better than using a constant.

Note: I had to adjust an underspecified query in the window.sql regression
test, because it was changing answers when the plan changed to use an
index-only scan.  Some of the adjacent tests perhaps should be adjusted
as well, but I didn't do that here.
parent dea95c7a
...@@ -1654,6 +1654,19 @@ ...@@ -1654,6 +1654,19 @@
</entry> </entry>
</row> </row>
<row>
<entry><structfield>relallvisible</structfield></entry>
<entry><type>int4</type></entry>
<entry></entry>
<entry>
Number of pages that are marked all-visible in the table's
visibility map. This is only an estimate used by the
planner. It is updated by <command>VACUUM</command>,
<command>ANALYZE</command>, and a few DDL commands such as
<command>CREATE INDEX</command>.
</entry>
</row>
<row> <row>
<entry><structfield>reltoastrelid</structfield></entry> <entry><structfield>reltoastrelid</structfield></entry>
<entry><type>oid</type></entry> <entry><type>oid</type></entry>
......
...@@ -55,6 +55,7 @@ hashbuild(PG_FUNCTION_ARGS) ...@@ -55,6 +55,7 @@ hashbuild(PG_FUNCTION_ARGS)
IndexBuildResult *result; IndexBuildResult *result;
BlockNumber relpages; BlockNumber relpages;
double reltuples; double reltuples;
double allvisfrac;
uint32 num_buckets; uint32 num_buckets;
HashBuildState buildstate; HashBuildState buildstate;
...@@ -67,7 +68,7 @@ hashbuild(PG_FUNCTION_ARGS) ...@@ -67,7 +68,7 @@ hashbuild(PG_FUNCTION_ARGS)
RelationGetRelationName(index)); RelationGetRelationName(index));
/* Estimate the number of rows currently present in the table */ /* Estimate the number of rows currently present in the table */
estimate_rel_size(heap, NULL, &relpages, &reltuples); estimate_rel_size(heap, NULL, &relpages, &reltuples, &allvisfrac);
/* Initialize the hash index metadata page and initial buckets */ /* Initialize the hash index metadata page and initial buckets */
num_buckets = _hash_metapinit(index, reltuples, MAIN_FORKNUM); num_buckets = _hash_metapinit(index, reltuples, MAIN_FORKNUM);
......
...@@ -16,6 +16,8 @@ ...@@ -16,6 +16,8 @@
* visibilitymap_pin_ok - check whether correct map page is already pinned * visibilitymap_pin_ok - check whether correct map page is already pinned
* visibilitymap_set - set a bit in a previously pinned page * visibilitymap_set - set a bit in a previously pinned page
* visibilitymap_test - test if a bit is set * visibilitymap_test - test if a bit is set
* visibilitymap_count - count number of bits set in visibility map
* visibilitymap_truncate - truncate the visibility map
* *
* NOTES * NOTES
* *
...@@ -110,6 +112,26 @@ ...@@ -110,6 +112,26 @@
#define HEAPBLK_TO_MAPBYTE(x) (((x) % HEAPBLOCKS_PER_PAGE) / HEAPBLOCKS_PER_BYTE) #define HEAPBLK_TO_MAPBYTE(x) (((x) % HEAPBLOCKS_PER_PAGE) / HEAPBLOCKS_PER_BYTE)
#define HEAPBLK_TO_MAPBIT(x) ((x) % HEAPBLOCKS_PER_BYTE) #define HEAPBLK_TO_MAPBIT(x) ((x) % HEAPBLOCKS_PER_BYTE)
/* table for fast counting of set bits */
static const uint8 number_of_ones[256] = {
0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4,
1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8
};
/* prototypes for internal routines */ /* prototypes for internal routines */
static Buffer vm_readbuf(Relation rel, BlockNumber blkno, bool extend); static Buffer vm_readbuf(Relation rel, BlockNumber blkno, bool extend);
static void vm_extend(Relation rel, BlockNumber nvmblocks); static void vm_extend(Relation rel, BlockNumber nvmblocks);
...@@ -307,6 +329,52 @@ visibilitymap_test(Relation rel, BlockNumber heapBlk, Buffer *buf) ...@@ -307,6 +329,52 @@ visibilitymap_test(Relation rel, BlockNumber heapBlk, Buffer *buf)
return result; return result;
} }
/*
* visibilitymap_count - count number of bits set in visibility map
*
* Note: we ignore the possibility of race conditions when the table is being
* extended concurrently with the call. New pages added to the table aren't
* going to be marked all-visible, so they won't affect the result.
*/
BlockNumber
visibilitymap_count(Relation rel)
{
BlockNumber result = 0;
BlockNumber mapBlock;
for (mapBlock = 0; ; mapBlock++)
{
Buffer mapBuffer;
unsigned char *map;
int i;
/*
* Read till we fall off the end of the map. We assume that any
* extra bytes in the last page are zeroed, so we don't bother
* excluding them from the count.
*/
mapBuffer = vm_readbuf(rel, mapBlock, false);
if (!BufferIsValid(mapBuffer))
break;
/*
* We choose not to lock the page, since the result is going to be
* immediately stale anyway if anyone is concurrently setting or
* clearing bits, and we only really need an approximate value.
*/
map = (unsigned char *) PageGetContents(BufferGetPage(mapBuffer));
for (i = 0; i < MAPSIZE; i++)
{
result += number_of_ones[map[i]];
}
ReleaseBuffer(mapBuffer);
}
return result;
}
/* /*
* visibilitymap_truncate - truncate the visibility map * visibilitymap_truncate - truncate the visibility map
* *
......
...@@ -772,6 +772,7 @@ InsertPgClassTuple(Relation pg_class_desc, ...@@ -772,6 +772,7 @@ InsertPgClassTuple(Relation pg_class_desc,
values[Anum_pg_class_reltablespace - 1] = ObjectIdGetDatum(rd_rel->reltablespace); values[Anum_pg_class_reltablespace - 1] = ObjectIdGetDatum(rd_rel->reltablespace);
values[Anum_pg_class_relpages - 1] = Int32GetDatum(rd_rel->relpages); values[Anum_pg_class_relpages - 1] = Int32GetDatum(rd_rel->relpages);
values[Anum_pg_class_reltuples - 1] = Float4GetDatum(rd_rel->reltuples); values[Anum_pg_class_reltuples - 1] = Float4GetDatum(rd_rel->reltuples);
values[Anum_pg_class_relallvisible - 1] = Int32GetDatum(rd_rel->relallvisible);
values[Anum_pg_class_reltoastrelid - 1] = ObjectIdGetDatum(rd_rel->reltoastrelid); values[Anum_pg_class_reltoastrelid - 1] = ObjectIdGetDatum(rd_rel->reltoastrelid);
values[Anum_pg_class_reltoastidxid - 1] = ObjectIdGetDatum(rd_rel->reltoastidxid); values[Anum_pg_class_reltoastidxid - 1] = ObjectIdGetDatum(rd_rel->reltoastidxid);
values[Anum_pg_class_relhasindex - 1] = BoolGetDatum(rd_rel->relhasindex); values[Anum_pg_class_relhasindex - 1] = BoolGetDatum(rd_rel->relhasindex);
...@@ -845,16 +846,19 @@ AddNewRelationTuple(Relation pg_class_desc, ...@@ -845,16 +846,19 @@ AddNewRelationTuple(Relation pg_class_desc,
/* The relation is real, but as yet empty */ /* The relation is real, but as yet empty */
new_rel_reltup->relpages = 0; new_rel_reltup->relpages = 0;
new_rel_reltup->reltuples = 0; new_rel_reltup->reltuples = 0;
new_rel_reltup->relallvisible = 0;
break; break;
case RELKIND_SEQUENCE: case RELKIND_SEQUENCE:
/* Sequences always have a known size */ /* Sequences always have a known size */
new_rel_reltup->relpages = 1; new_rel_reltup->relpages = 1;
new_rel_reltup->reltuples = 1; new_rel_reltup->reltuples = 1;
new_rel_reltup->relallvisible = 0;
break; break;
default: default:
/* Views, etc, have no disk storage */ /* Views, etc, have no disk storage */
new_rel_reltup->relpages = 0; new_rel_reltup->relpages = 0;
new_rel_reltup->reltuples = 0; new_rel_reltup->reltuples = 0;
new_rel_reltup->relallvisible = 0;
break; break;
} }
......
...@@ -26,6 +26,7 @@ ...@@ -26,6 +26,7 @@
#include "access/relscan.h" #include "access/relscan.h"
#include "access/sysattr.h" #include "access/sysattr.h"
#include "access/transam.h" #include "access/transam.h"
#include "access/visibilitymap.h"
#include "access/xact.h" #include "access/xact.h"
#include "bootstrap/bootstrap.h" #include "bootstrap/bootstrap.h"
#include "catalog/catalog.h" #include "catalog/catalog.h"
...@@ -1059,7 +1060,7 @@ index_create(Relation heapRelation, ...@@ -1059,7 +1060,7 @@ index_create(Relation heapRelation,
true, true,
isprimary, isprimary,
InvalidOid, InvalidOid,
heapRelation->rd_rel->reltuples); -1.0);
/* Make the above update visible */ /* Make the above update visible */
CommandCounterIncrement(); CommandCounterIncrement();
} }
...@@ -1225,7 +1226,7 @@ index_constraint_create(Relation heapRelation, ...@@ -1225,7 +1226,7 @@ index_constraint_create(Relation heapRelation,
true, true,
true, true,
InvalidOid, InvalidOid,
heapRelation->rd_rel->reltuples); -1.0);
/* /*
* If needed, mark the index as primary and/or deferred in pg_index. * If needed, mark the index as primary and/or deferred in pg_index.
...@@ -1533,9 +1534,10 @@ FormIndexDatum(IndexInfo *indexInfo, ...@@ -1533,9 +1534,10 @@ FormIndexDatum(IndexInfo *indexInfo,
* isprimary: if true, set relhaspkey true; else no change * isprimary: if true, set relhaspkey true; else no change
* reltoastidxid: if not InvalidOid, set reltoastidxid to this value; * reltoastidxid: if not InvalidOid, set reltoastidxid to this value;
* else no change * else no change
* reltuples: set reltuples to this value * reltuples: if >= 0, set reltuples to this value; else no change
* *
* relpages is also updated (using RelationGetNumberOfBlocks()). * If reltuples >= 0, relpages and relallvisible are also updated (using
* RelationGetNumberOfBlocks() and visibilitymap_count()).
* *
* NOTE: an important side-effect of this operation is that an SI invalidation * NOTE: an important side-effect of this operation is that an SI invalidation
* message is sent out to all backends --- including me --- causing relcache * message is sent out to all backends --- including me --- causing relcache
...@@ -1550,7 +1552,6 @@ index_update_stats(Relation rel, ...@@ -1550,7 +1552,6 @@ index_update_stats(Relation rel,
bool hasindex, bool isprimary, bool hasindex, bool isprimary,
Oid reltoastidxid, double reltuples) Oid reltoastidxid, double reltuples)
{ {
BlockNumber relpages = RelationGetNumberOfBlocks(rel);
Oid relid = RelationGetRelid(rel); Oid relid = RelationGetRelid(rel);
Relation pg_class; Relation pg_class;
HeapTuple tuple; HeapTuple tuple;
...@@ -1586,9 +1587,11 @@ index_update_stats(Relation rel, ...@@ -1586,9 +1587,11 @@ index_update_stats(Relation rel,
* It is safe to use a non-transactional update even though our * It is safe to use a non-transactional update even though our
* transaction could still fail before committing. Setting relhasindex * transaction could still fail before committing. Setting relhasindex
* true is safe even if there are no indexes (VACUUM will eventually fix * true is safe even if there are no indexes (VACUUM will eventually fix
* it), likewise for relhaspkey. And of course the relpages and reltuples * it), likewise for relhaspkey. And of course the new relpages and
* counts are correct (or at least more so than the old values) * reltuples counts are correct regardless. However, we don't want to
* regardless. * change relpages (or relallvisible) if the caller isn't providing an
* updated reltuples count, because that would bollix the
* reltuples/relpages ratio which is what's really important.
*/ */
pg_class = heap_open(RelationRelationId, RowExclusiveLock); pg_class = heap_open(RelationRelationId, RowExclusiveLock);
...@@ -1650,16 +1653,33 @@ index_update_stats(Relation rel, ...@@ -1650,16 +1653,33 @@ index_update_stats(Relation rel,
dirty = true; dirty = true;
} }
} }
if (reltuples >= 0)
{
BlockNumber relpages = RelationGetNumberOfBlocks(rel);
BlockNumber relallvisible;
if (rd_rel->relkind != RELKIND_INDEX)
relallvisible = visibilitymap_count(rel);
else /* don't bother for indexes */
relallvisible = 0;
if (rd_rel->relpages != (int32) relpages)
{
rd_rel->relpages = (int32) relpages;
dirty = true;
}
if (rd_rel->reltuples != (float4) reltuples) if (rd_rel->reltuples != (float4) reltuples)
{ {
rd_rel->reltuples = (float4) reltuples; rd_rel->reltuples = (float4) reltuples;
dirty = true; dirty = true;
} }
if (rd_rel->relpages != (int32) relpages) if (rd_rel->relallvisible != (int32) relallvisible)
{ {
rd_rel->relpages = (int32) relpages; rd_rel->relallvisible = (int32) relallvisible;
dirty = true; dirty = true;
} }
}
/* /*
* If anything changed, write out the tuple * If anything changed, write out the tuple
......
...@@ -19,6 +19,7 @@ ...@@ -19,6 +19,7 @@
#include "access/transam.h" #include "access/transam.h"
#include "access/tupconvert.h" #include "access/tupconvert.h"
#include "access/tuptoaster.h" #include "access/tuptoaster.h"
#include "access/visibilitymap.h"
#include "access/xact.h" #include "access/xact.h"
#include "catalog/index.h" #include "catalog/index.h"
#include "catalog/indexing.h" #include "catalog/indexing.h"
...@@ -534,7 +535,10 @@ do_analyze_rel(Relation onerel, VacuumStmt *vacstmt, bool inh) ...@@ -534,7 +535,10 @@ do_analyze_rel(Relation onerel, VacuumStmt *vacstmt, bool inh)
if (!inh) if (!inh)
vac_update_relstats(onerel, vac_update_relstats(onerel,
RelationGetNumberOfBlocks(onerel), RelationGetNumberOfBlocks(onerel),
totalrows, hasindex, InvalidTransactionId); totalrows,
visibilitymap_count(onerel),
hasindex,
InvalidTransactionId);
/* /*
* Same for indexes. Vacuum always scans all indexes, so if we're part of * Same for indexes. Vacuum always scans all indexes, so if we're part of
...@@ -551,7 +555,10 @@ do_analyze_rel(Relation onerel, VacuumStmt *vacstmt, bool inh) ...@@ -551,7 +555,10 @@ do_analyze_rel(Relation onerel, VacuumStmt *vacstmt, bool inh)
totalindexrows = ceil(thisdata->tupleFract * totalrows); totalindexrows = ceil(thisdata->tupleFract * totalrows);
vac_update_relstats(Irel[ind], vac_update_relstats(Irel[ind],
RelationGetNumberOfBlocks(Irel[ind]), RelationGetNumberOfBlocks(Irel[ind]),
totalindexrows, false, InvalidTransactionId); totalindexrows,
0,
false,
InvalidTransactionId);
} }
} }
......
...@@ -1205,6 +1205,7 @@ swap_relation_files(Oid r1, Oid r2, bool target_is_pg_class, ...@@ -1205,6 +1205,7 @@ swap_relation_files(Oid r1, Oid r2, bool target_is_pg_class,
{ {
int4 swap_pages; int4 swap_pages;
float4 swap_tuples; float4 swap_tuples;
int4 swap_allvisible;
swap_pages = relform1->relpages; swap_pages = relform1->relpages;
relform1->relpages = relform2->relpages; relform1->relpages = relform2->relpages;
...@@ -1213,6 +1214,10 @@ swap_relation_files(Oid r1, Oid r2, bool target_is_pg_class, ...@@ -1213,6 +1214,10 @@ swap_relation_files(Oid r1, Oid r2, bool target_is_pg_class,
swap_tuples = relform1->reltuples; swap_tuples = relform1->reltuples;
relform1->reltuples = relform2->reltuples; relform1->reltuples = relform2->reltuples;
relform2->reltuples = swap_tuples; relform2->reltuples = swap_tuples;
swap_allvisible = relform1->relallvisible;
relform1->relallvisible = relform2->relallvisible;
relform2->relallvisible = swap_allvisible;
} }
/* /*
......
...@@ -569,6 +569,7 @@ vac_estimate_reltuples(Relation relation, bool is_analyze, ...@@ -569,6 +569,7 @@ vac_estimate_reltuples(Relation relation, bool is_analyze,
void void
vac_update_relstats(Relation relation, vac_update_relstats(Relation relation,
BlockNumber num_pages, double num_tuples, BlockNumber num_pages, double num_tuples,
BlockNumber num_all_visible_pages,
bool hasindex, TransactionId frozenxid) bool hasindex, TransactionId frozenxid)
{ {
Oid relid = RelationGetRelid(relation); Oid relid = RelationGetRelid(relation);
...@@ -599,6 +600,11 @@ vac_update_relstats(Relation relation, ...@@ -599,6 +600,11 @@ vac_update_relstats(Relation relation,
pgcform->reltuples = (float4) num_tuples; pgcform->reltuples = (float4) num_tuples;
dirty = true; dirty = true;
} }
if (pgcform->relallvisible != (int32) num_all_visible_pages)
{
pgcform->relallvisible = (int32) num_all_visible_pages;
dirty = true;
}
if (pgcform->relhasindex != hasindex) if (pgcform->relhasindex != hasindex)
{ {
pgcform->relhasindex = hasindex; pgcform->relhasindex = hasindex;
......
...@@ -158,6 +158,7 @@ lazy_vacuum_rel(Relation onerel, VacuumStmt *vacstmt, ...@@ -158,6 +158,7 @@ lazy_vacuum_rel(Relation onerel, VacuumStmt *vacstmt,
TransactionId freezeTableLimit; TransactionId freezeTableLimit;
BlockNumber new_rel_pages; BlockNumber new_rel_pages;
double new_rel_tuples; double new_rel_tuples;
BlockNumber new_rel_allvisible;
TransactionId new_frozen_xid; TransactionId new_frozen_xid;
/* measure elapsed time iff autovacuum logging requires it */ /* measure elapsed time iff autovacuum logging requires it */
...@@ -222,6 +223,10 @@ lazy_vacuum_rel(Relation onerel, VacuumStmt *vacstmt, ...@@ -222,6 +223,10 @@ lazy_vacuum_rel(Relation onerel, VacuumStmt *vacstmt,
* density") with nonzero relpages and reltuples=0 (which means "zero * density") with nonzero relpages and reltuples=0 (which means "zero
* tuple density") unless there's some actual evidence for the latter. * tuple density") unless there's some actual evidence for the latter.
* *
* We do update relallvisible even in the corner case, since if the
* table is all-visible we'd definitely like to know that. But clamp
* the value to be not more than what we're setting relpages to.
*
* Also, don't change relfrozenxid if we skipped any pages, since then * Also, don't change relfrozenxid if we skipped any pages, since then
* we don't know for certain that all tuples have a newer xmin. * we don't know for certain that all tuples have a newer xmin.
*/ */
...@@ -233,12 +238,18 @@ lazy_vacuum_rel(Relation onerel, VacuumStmt *vacstmt, ...@@ -233,12 +238,18 @@ lazy_vacuum_rel(Relation onerel, VacuumStmt *vacstmt,
new_rel_tuples = vacrelstats->old_rel_tuples; new_rel_tuples = vacrelstats->old_rel_tuples;
} }
new_rel_allvisible = visibilitymap_count(onerel);
if (new_rel_allvisible > new_rel_pages)
new_rel_allvisible = new_rel_pages;
new_frozen_xid = FreezeLimit; new_frozen_xid = FreezeLimit;
if (vacrelstats->scanned_pages < vacrelstats->rel_pages) if (vacrelstats->scanned_pages < vacrelstats->rel_pages)
new_frozen_xid = InvalidTransactionId; new_frozen_xid = InvalidTransactionId;
vac_update_relstats(onerel, vac_update_relstats(onerel,
new_rel_pages, new_rel_tuples, new_rel_pages,
new_rel_tuples,
new_rel_allvisible,
vacrelstats->hasindex, vacrelstats->hasindex,
new_frozen_xid); new_frozen_xid);
...@@ -1063,8 +1074,11 @@ lazy_cleanup_index(Relation indrel, ...@@ -1063,8 +1074,11 @@ lazy_cleanup_index(Relation indrel,
*/ */
if (!stats->estimated_count) if (!stats->estimated_count)
vac_update_relstats(indrel, vac_update_relstats(indrel,
stats->num_pages, stats->num_index_tuples, stats->num_pages,
false, InvalidTransactionId); stats->num_index_tuples,
0,
false,
InvalidTransactionId);
ereport(elevel, ereport(elevel,
(errmsg("index \"%s\" now contains %.0f row versions in %u pages", (errmsg("index \"%s\" now contains %.0f row versions in %u pages",
......
...@@ -1743,6 +1743,7 @@ _outRelOptInfo(StringInfo str, RelOptInfo *node) ...@@ -1743,6 +1743,7 @@ _outRelOptInfo(StringInfo str, RelOptInfo *node)
WRITE_NODE_FIELD(indexlist); WRITE_NODE_FIELD(indexlist);
WRITE_UINT_FIELD(pages); WRITE_UINT_FIELD(pages);
WRITE_FLOAT_FIELD(tuples, "%.0f"); WRITE_FLOAT_FIELD(tuples, "%.0f");
WRITE_FLOAT_FIELD(allvisfrac, "%.6f");
WRITE_NODE_FIELD(subplan); WRITE_NODE_FIELD(subplan);
WRITE_NODE_FIELD(subroot); WRITE_NODE_FIELD(subroot);
WRITE_NODE_FIELD(baserestrictinfo); WRITE_NODE_FIELD(baserestrictinfo);
......
...@@ -120,9 +120,6 @@ bool enable_material = true; ...@@ -120,9 +120,6 @@ bool enable_material = true;
bool enable_mergejoin = true; bool enable_mergejoin = true;
bool enable_hashjoin = true; bool enable_hashjoin = true;
/* Possibly this should become a GUC too */
static double visibility_fraction = 0.9;
typedef struct typedef struct
{ {
PlannerInfo *root; PlannerInfo *root;
...@@ -324,9 +321,10 @@ cost_index(IndexPath *path, PlannerInfo *root, ...@@ -324,9 +321,10 @@ cost_index(IndexPath *path, PlannerInfo *root,
* *
* If it's an index-only scan, then we will not need to fetch any heap * If it's an index-only scan, then we will not need to fetch any heap
* pages for which the visibility map shows all tuples are visible. * pages for which the visibility map shows all tuples are visible.
* Unfortunately, we have no stats as to how much of the heap is * Hence, reduce the estimated number of heap fetches accordingly.
* all-visible, and that's likely to be a rather unstable number anyway. * We use the measured fraction of the entire heap that is all-visible,
* We use an arbitrary constant visibility_fraction to estimate this. * which might not be particularly relevant to the subset of the heap
* that this query will fetch; but it's not clear how to do better.
*---------- *----------
*/ */
if (outer_rel != NULL && outer_rel->rows > 1) if (outer_rel != NULL && outer_rel->rows > 1)
...@@ -347,7 +345,7 @@ cost_index(IndexPath *path, PlannerInfo *root, ...@@ -347,7 +345,7 @@ cost_index(IndexPath *path, PlannerInfo *root,
root); root);
if (indexonly) if (indexonly)
pages_fetched = ceil(pages_fetched * visibility_fraction); pages_fetched = ceil(pages_fetched * (1.0 - baserel->allvisfrac));
max_IO_cost = (pages_fetched * spc_random_page_cost) / num_scans; max_IO_cost = (pages_fetched * spc_random_page_cost) / num_scans;
...@@ -369,7 +367,7 @@ cost_index(IndexPath *path, PlannerInfo *root, ...@@ -369,7 +367,7 @@ cost_index(IndexPath *path, PlannerInfo *root,
root); root);
if (indexonly) if (indexonly)
pages_fetched = ceil(pages_fetched * visibility_fraction); pages_fetched = ceil(pages_fetched * (1.0 - baserel->allvisfrac));
min_IO_cost = (pages_fetched * spc_random_page_cost) / num_scans; min_IO_cost = (pages_fetched * spc_random_page_cost) / num_scans;
} }
...@@ -385,7 +383,7 @@ cost_index(IndexPath *path, PlannerInfo *root, ...@@ -385,7 +383,7 @@ cost_index(IndexPath *path, PlannerInfo *root,
root); root);
if (indexonly) if (indexonly)
pages_fetched = ceil(pages_fetched * visibility_fraction); pages_fetched = ceil(pages_fetched * (1.0 - baserel->allvisfrac));
/* max_IO_cost is for the perfectly uncorrelated case (csquared=0) */ /* max_IO_cost is for the perfectly uncorrelated case (csquared=0) */
max_IO_cost = pages_fetched * spc_random_page_cost; max_IO_cost = pages_fetched * spc_random_page_cost;
...@@ -394,7 +392,7 @@ cost_index(IndexPath *path, PlannerInfo *root, ...@@ -394,7 +392,7 @@ cost_index(IndexPath *path, PlannerInfo *root,
pages_fetched = ceil(indexSelectivity * (double) baserel->pages); pages_fetched = ceil(indexSelectivity * (double) baserel->pages);
if (indexonly) if (indexonly)
pages_fetched = ceil(pages_fetched * visibility_fraction); pages_fetched = ceil(pages_fetched * (1.0 - baserel->allvisfrac));
min_IO_cost = spc_random_page_cost; min_IO_cost = spc_random_page_cost;
if (pages_fetched > 1) if (pages_fetched > 1)
......
...@@ -116,7 +116,7 @@ get_relation_info(PlannerInfo *root, Oid relationObjectId, bool inhparent, ...@@ -116,7 +116,7 @@ get_relation_info(PlannerInfo *root, Oid relationObjectId, bool inhparent,
*/ */
if (!inhparent) if (!inhparent)
estimate_rel_size(relation, rel->attr_widths - rel->min_attr, estimate_rel_size(relation, rel->attr_widths - rel->min_attr,
&rel->pages, &rel->tuples); &rel->pages, &rel->tuples, &rel->allvisfrac);
/* /*
* Make list of indexes. Ignore indexes on system catalogs if told to. * Make list of indexes. Ignore indexes on system catalogs if told to.
...@@ -339,8 +339,10 @@ get_relation_info(PlannerInfo *root, Oid relationObjectId, bool inhparent, ...@@ -339,8 +339,10 @@ get_relation_info(PlannerInfo *root, Oid relationObjectId, bool inhparent,
} }
else else
{ {
double allvisfrac; /* dummy */
estimate_rel_size(indexRelation, NULL, estimate_rel_size(indexRelation, NULL,
&info->pages, &info->tuples); &info->pages, &info->tuples, &allvisfrac);
if (info->tuples > rel->tuples) if (info->tuples > rel->tuples)
info->tuples = rel->tuples; info->tuples = rel->tuples;
} }
...@@ -369,17 +371,21 @@ get_relation_info(PlannerInfo *root, Oid relationObjectId, bool inhparent, ...@@ -369,17 +371,21 @@ get_relation_info(PlannerInfo *root, Oid relationObjectId, bool inhparent,
/* /*
* estimate_rel_size - estimate # pages and # tuples in a table or index * estimate_rel_size - estimate # pages and # tuples in a table or index
* *
* We also estimate the fraction of the pages that are marked all-visible in
* the visibility map, for use in estimation of index-only scans.
*
* If attr_widths isn't NULL, it points to the zero-index entry of the * If attr_widths isn't NULL, it points to the zero-index entry of the
* relation's attr_widths[] cache; we fill this in if we have need to compute * relation's attr_widths[] cache; we fill this in if we have need to compute
* the attribute widths for estimation purposes. * the attribute widths for estimation purposes.
*/ */
void void
estimate_rel_size(Relation rel, int32 *attr_widths, estimate_rel_size(Relation rel, int32 *attr_widths,
BlockNumber *pages, double *tuples) BlockNumber *pages, double *tuples, double *allvisfrac)
{ {
BlockNumber curpages; BlockNumber curpages;
BlockNumber relpages; BlockNumber relpages;
double reltuples; double reltuples;
BlockNumber relallvisible;
double density; double density;
switch (rel->rd_rel->relkind) switch (rel->rd_rel->relkind)
...@@ -432,11 +438,13 @@ estimate_rel_size(Relation rel, int32 *attr_widths, ...@@ -432,11 +438,13 @@ estimate_rel_size(Relation rel, int32 *attr_widths,
if (curpages == 0) if (curpages == 0)
{ {
*tuples = 0; *tuples = 0;
*allvisfrac = 0;
break; break;
} }
/* coerce values in pg_class to more desirable types */ /* coerce values in pg_class to more desirable types */
relpages = (BlockNumber) rel->rd_rel->relpages; relpages = (BlockNumber) rel->rd_rel->relpages;
reltuples = (double) rel->rd_rel->reltuples; reltuples = (double) rel->rd_rel->reltuples;
relallvisible = (BlockNumber) rel->rd_rel->relallvisible;
/* /*
* If it's an index, discount the metapage while estimating the * If it's an index, discount the metapage while estimating the
...@@ -480,21 +488,37 @@ estimate_rel_size(Relation rel, int32 *attr_widths, ...@@ -480,21 +488,37 @@ estimate_rel_size(Relation rel, int32 *attr_widths,
density = (BLCKSZ - SizeOfPageHeaderData) / tuple_width; density = (BLCKSZ - SizeOfPageHeaderData) / tuple_width;
} }
*tuples = rint(density * (double) curpages); *tuples = rint(density * (double) curpages);
/*
* We use relallvisible as-is, rather than scaling it up like we
* do for the pages and tuples counts, on the theory that any
* pages added since the last VACUUM are most likely not marked
* all-visible. But costsize.c wants it converted to a fraction.
*/
if (relallvisible == 0 || curpages <= 0)
*allvisfrac = 0;
else if ((double) relallvisible >= curpages)
*allvisfrac = 1;
else
*allvisfrac = (double) relallvisible / curpages;
break; break;
case RELKIND_SEQUENCE: case RELKIND_SEQUENCE:
/* Sequences always have a known size */ /* Sequences always have a known size */
*pages = 1; *pages = 1;
*tuples = 1; *tuples = 1;
*allvisfrac = 0;
break; break;
case RELKIND_FOREIGN_TABLE: case RELKIND_FOREIGN_TABLE:
/* Just use whatever's in pg_class */ /* Just use whatever's in pg_class */
*pages = rel->rd_rel->relpages; *pages = rel->rd_rel->relpages;
*tuples = rel->rd_rel->reltuples; *tuples = rel->rd_rel->reltuples;
*allvisfrac = 0;
break; break;
default: default:
/* else it has no disk storage; probably shouldn't get here? */ /* else it has no disk storage; probably shouldn't get here? */
*pages = 0; *pages = 0;
*tuples = 0; *tuples = 0;
*allvisfrac = 0;
break; break;
} }
} }
......
...@@ -109,6 +109,7 @@ build_simple_rel(PlannerInfo *root, int relid, RelOptKind reloptkind) ...@@ -109,6 +109,7 @@ build_simple_rel(PlannerInfo *root, int relid, RelOptKind reloptkind)
rel->indexlist = NIL; rel->indexlist = NIL;
rel->pages = 0; rel->pages = 0;
rel->tuples = 0; rel->tuples = 0;
rel->allvisfrac = 0;
rel->subplan = NULL; rel->subplan = NULL;
rel->subroot = NULL; rel->subroot = NULL;
rel->baserestrictinfo = NIL; rel->baserestrictinfo = NIL;
...@@ -362,6 +363,7 @@ build_join_rel(PlannerInfo *root, ...@@ -362,6 +363,7 @@ build_join_rel(PlannerInfo *root,
joinrel->indexlist = NIL; joinrel->indexlist = NIL;
joinrel->pages = 0; joinrel->pages = 0;
joinrel->tuples = 0; joinrel->tuples = 0;
joinrel->allvisfrac = 0;
joinrel->subplan = NULL; joinrel->subplan = NULL;
joinrel->subroot = NULL; joinrel->subroot = NULL;
joinrel->baserestrictinfo = NIL; joinrel->baserestrictinfo = NIL;
......
...@@ -1414,6 +1414,7 @@ formrdesc(const char *relationName, Oid relationReltype, ...@@ -1414,6 +1414,7 @@ formrdesc(const char *relationName, Oid relationReltype,
relation->rd_rel->relpages = 0; relation->rd_rel->relpages = 0;
relation->rd_rel->reltuples = 0; relation->rd_rel->reltuples = 0;
relation->rd_rel->relallvisible = 0;
relation->rd_rel->relkind = RELKIND_RELATION; relation->rd_rel->relkind = RELKIND_RELATION;
relation->rd_rel->relhasoids = hasoids; relation->rd_rel->relhasoids = hasoids;
relation->rd_rel->relnatts = (int16) natts; relation->rd_rel->relnatts = (int16) natts;
...@@ -2668,6 +2669,7 @@ RelationSetNewRelfilenode(Relation relation, TransactionId freezeXid) ...@@ -2668,6 +2669,7 @@ RelationSetNewRelfilenode(Relation relation, TransactionId freezeXid)
{ {
classform->relpages = 0; /* it's empty until further notice */ classform->relpages = 0; /* it's empty until further notice */
classform->reltuples = 0; classform->reltuples = 0;
classform->relallvisible = 0;
} }
classform->relfrozenxid = freezeXid; classform->relfrozenxid = freezeXid;
......
...@@ -27,6 +27,7 @@ extern bool visibilitymap_pin_ok(BlockNumber heapBlk, Buffer vmbuf); ...@@ -27,6 +27,7 @@ extern bool visibilitymap_pin_ok(BlockNumber heapBlk, Buffer vmbuf);
extern void visibilitymap_set(Relation rel, BlockNumber heapBlk, extern void visibilitymap_set(Relation rel, BlockNumber heapBlk,
XLogRecPtr recptr, Buffer vmbuf); XLogRecPtr recptr, Buffer vmbuf);
extern bool visibilitymap_test(Relation rel, BlockNumber heapBlk, Buffer *vmbuf); extern bool visibilitymap_test(Relation rel, BlockNumber heapBlk, Buffer *vmbuf);
extern void visibilitymap_truncate(Relation rel, BlockNumber heapblk); extern BlockNumber visibilitymap_count(Relation rel);
extern void visibilitymap_truncate(Relation rel, BlockNumber nheapblocks);
#endif /* VISIBILITYMAP_H */ #endif /* VISIBILITYMAP_H */
...@@ -53,6 +53,6 @@ ...@@ -53,6 +53,6 @@
*/ */
/* yyyymmddN */ /* yyyymmddN */
#define CATALOG_VERSION_NO 201110071 #define CATALOG_VERSION_NO 201110141
#endif #endif
...@@ -45,6 +45,8 @@ CATALOG(pg_class,1259) BKI_BOOTSTRAP BKI_ROWTYPE_OID(83) BKI_SCHEMA_MACRO ...@@ -45,6 +45,8 @@ CATALOG(pg_class,1259) BKI_BOOTSTRAP BKI_ROWTYPE_OID(83) BKI_SCHEMA_MACRO
Oid reltablespace; /* identifier of table space for relation */ Oid reltablespace; /* identifier of table space for relation */
int4 relpages; /* # of blocks (not always up-to-date) */ int4 relpages; /* # of blocks (not always up-to-date) */
float4 reltuples; /* # of tuples (not always up-to-date) */ float4 reltuples; /* # of tuples (not always up-to-date) */
int4 relallvisible; /* # of all-visible blocks (not always
* up-to-date) */
Oid reltoastrelid; /* OID of toast table; 0 if none */ Oid reltoastrelid; /* OID of toast table; 0 if none */
Oid reltoastidxid; /* if toast table, OID of chunk_id index */ Oid reltoastidxid; /* if toast table, OID of chunk_id index */
bool relhasindex; /* T if has (or has had) any indexes */ bool relhasindex; /* T if has (or has had) any indexes */
...@@ -92,7 +94,7 @@ typedef FormData_pg_class *Form_pg_class; ...@@ -92,7 +94,7 @@ typedef FormData_pg_class *Form_pg_class;
* ---------------- * ----------------
*/ */
#define Natts_pg_class 26 #define Natts_pg_class 27
#define Anum_pg_class_relname 1 #define Anum_pg_class_relname 1
#define Anum_pg_class_relnamespace 2 #define Anum_pg_class_relnamespace 2
#define Anum_pg_class_reltype 3 #define Anum_pg_class_reltype 3
...@@ -103,22 +105,23 @@ typedef FormData_pg_class *Form_pg_class; ...@@ -103,22 +105,23 @@ typedef FormData_pg_class *Form_pg_class;
#define Anum_pg_class_reltablespace 8 #define Anum_pg_class_reltablespace 8
#define Anum_pg_class_relpages 9 #define Anum_pg_class_relpages 9
#define Anum_pg_class_reltuples 10 #define Anum_pg_class_reltuples 10
#define Anum_pg_class_reltoastrelid 11 #define Anum_pg_class_relallvisible 11
#define Anum_pg_class_reltoastidxid 12 #define Anum_pg_class_reltoastrelid 12
#define Anum_pg_class_relhasindex 13 #define Anum_pg_class_reltoastidxid 13
#define Anum_pg_class_relisshared 14 #define Anum_pg_class_relhasindex 14
#define Anum_pg_class_relpersistence 15 #define Anum_pg_class_relisshared 15
#define Anum_pg_class_relkind 16 #define Anum_pg_class_relpersistence 16
#define Anum_pg_class_relnatts 17 #define Anum_pg_class_relkind 17
#define Anum_pg_class_relchecks 18 #define Anum_pg_class_relnatts 18
#define Anum_pg_class_relhasoids 19 #define Anum_pg_class_relchecks 19
#define Anum_pg_class_relhaspkey 20 #define Anum_pg_class_relhasoids 20
#define Anum_pg_class_relhasrules 21 #define Anum_pg_class_relhaspkey 21
#define Anum_pg_class_relhastriggers 22 #define Anum_pg_class_relhasrules 22
#define Anum_pg_class_relhassubclass 23 #define Anum_pg_class_relhastriggers 23
#define Anum_pg_class_relfrozenxid 24 #define Anum_pg_class_relhassubclass 24
#define Anum_pg_class_relacl 25 #define Anum_pg_class_relfrozenxid 25
#define Anum_pg_class_reloptions 26 #define Anum_pg_class_relacl 26
#define Anum_pg_class_reloptions 27
/* ---------------- /* ----------------
* initial contents of pg_class * initial contents of pg_class
...@@ -130,13 +133,13 @@ typedef FormData_pg_class *Form_pg_class; ...@@ -130,13 +133,13 @@ typedef FormData_pg_class *Form_pg_class;
*/ */
/* Note: "3" in the relfrozenxid column stands for FirstNormalTransactionId */ /* Note: "3" in the relfrozenxid column stands for FirstNormalTransactionId */
DATA(insert OID = 1247 ( pg_type PGNSP 71 0 PGUID 0 0 0 0 0 0 0 f f p r 29 0 t f f f f 3 _null_ _null_ )); DATA(insert OID = 1247 ( pg_type PGNSP 71 0 PGUID 0 0 0 0 0 0 0 0 f f p r 29 0 t f f f f 3 _null_ _null_ ));
DESCR(""); DESCR("");
DATA(insert OID = 1249 ( pg_attribute PGNSP 75 0 PGUID 0 0 0 0 0 0 0 f f p r 21 0 f f f f f 3 _null_ _null_ )); DATA(insert OID = 1249 ( pg_attribute PGNSP 75 0 PGUID 0 0 0 0 0 0 0 0 f f p r 21 0 f f f f f 3 _null_ _null_ ));
DESCR(""); DESCR("");
DATA(insert OID = 1255 ( pg_proc PGNSP 81 0 PGUID 0 0 0 0 0 0 0 f f p r 26 0 t f f f f 3 _null_ _null_ )); DATA(insert OID = 1255 ( pg_proc PGNSP 81 0 PGUID 0 0 0 0 0 0 0 0 f f p r 26 0 t f f f f 3 _null_ _null_ ));
DESCR(""); DESCR("");
DATA(insert OID = 1259 ( pg_class PGNSP 83 0 PGUID 0 0 0 0 0 0 0 f f p r 26 0 t f f f f 3 _null_ _null_ )); DATA(insert OID = 1259 ( pg_class PGNSP 83 0 PGUID 0 0 0 0 0 0 0 0 f f p r 27 0 t f f f f 3 _null_ _null_ ));
DESCR(""); DESCR("");
......
...@@ -149,6 +149,7 @@ extern double vac_estimate_reltuples(Relation relation, bool is_analyze, ...@@ -149,6 +149,7 @@ extern double vac_estimate_reltuples(Relation relation, bool is_analyze,
extern void vac_update_relstats(Relation relation, extern void vac_update_relstats(Relation relation,
BlockNumber num_pages, BlockNumber num_pages,
double num_tuples, double num_tuples,
BlockNumber num_all_visible_pages,
bool hasindex, bool hasindex,
TransactionId frozenxid); TransactionId frozenxid);
extern void vacuum_set_xid_limits(int freeze_min_age, int freeze_table_age, extern void vacuum_set_xid_limits(int freeze_min_age, int freeze_table_age,
......
...@@ -319,6 +319,7 @@ typedef struct PlannerInfo ...@@ -319,6 +319,7 @@ typedef struct PlannerInfo
* (always NIL if it's not a table) * (always NIL if it's not a table)
* pages - number of disk pages in relation (zero if not a table) * pages - number of disk pages in relation (zero if not a table)
* tuples - number of tuples in relation (not considering restrictions) * tuples - number of tuples in relation (not considering restrictions)
* allvisfrac - fraction of disk pages that are marked all-visible
* subplan - plan for subquery (NULL if it's not a subquery) * subplan - plan for subquery (NULL if it's not a subquery)
* subroot - PlannerInfo for subquery (NULL if it's not a subquery) * subroot - PlannerInfo for subquery (NULL if it's not a subquery)
* *
...@@ -402,8 +403,9 @@ typedef struct RelOptInfo ...@@ -402,8 +403,9 @@ typedef struct RelOptInfo
Relids *attr_needed; /* array indexed [min_attr .. max_attr] */ Relids *attr_needed; /* array indexed [min_attr .. max_attr] */
int32 *attr_widths; /* array indexed [min_attr .. max_attr] */ int32 *attr_widths; /* array indexed [min_attr .. max_attr] */
List *indexlist; /* list of IndexOptInfo */ List *indexlist; /* list of IndexOptInfo */
BlockNumber pages; BlockNumber pages; /* size estimates derived from pg_class */
double tuples; double tuples;
double allvisfrac;
struct Plan *subplan; /* if subquery */ struct Plan *subplan; /* if subquery */
PlannerInfo *subroot; /* if subquery */ PlannerInfo *subroot; /* if subquery */
......
...@@ -29,7 +29,7 @@ extern void get_relation_info(PlannerInfo *root, Oid relationObjectId, ...@@ -29,7 +29,7 @@ extern void get_relation_info(PlannerInfo *root, Oid relationObjectId,
bool inhparent, RelOptInfo *rel); bool inhparent, RelOptInfo *rel);
extern void estimate_rel_size(Relation rel, int32 *attr_widths, extern void estimate_rel_size(Relation rel, int32 *attr_widths,
BlockNumber *pages, double *tuples); BlockNumber *pages, double *tuples, double *allvisfrac);
extern int32 get_relation_data_width(Oid relid, int32 *attr_widths); extern int32 get_relation_data_width(Oid relid, int32 *attr_widths);
......
...@@ -901,21 +901,22 @@ WINDOW w AS (order by four range between current row and unbounded following); ...@@ -901,21 +901,22 @@ WINDOW w AS (order by four range between current row and unbounded following);
(10 rows) (10 rows)
SELECT sum(unique1) over SELECT sum(unique1) over
(rows (SELECT unique1 FROM tenk1 ORDER BY unique1 LIMIT 1) + 1 PRECEDING), (order by unique1
rows (SELECT unique1 FROM tenk1 ORDER BY unique1 LIMIT 1) + 1 PRECEDING),
unique1 unique1
FROM tenk1 WHERE unique1 < 10; FROM tenk1 WHERE unique1 < 10;
sum | unique1 sum | unique1
-----+--------- -----+---------
4 | 4 0 | 0
6 | 2 1 | 1
3 | 1 3 | 2
7 | 6 5 | 3
15 | 9 7 | 4
17 | 8 9 | 5
13 | 5 11 | 6
8 | 3 13 | 7
10 | 7 15 | 8
7 | 0 17 | 9
(10 rows) (10 rows)
CREATE TEMP VIEW v_window AS CREATE TEMP VIEW v_window AS
......
...@@ -211,7 +211,8 @@ FROM tenk1 WHERE unique1 < 10 ...@@ -211,7 +211,8 @@ FROM tenk1 WHERE unique1 < 10
WINDOW w AS (order by four range between current row and unbounded following); WINDOW w AS (order by four range between current row and unbounded following);
SELECT sum(unique1) over SELECT sum(unique1) over
(rows (SELECT unique1 FROM tenk1 ORDER BY unique1 LIMIT 1) + 1 PRECEDING), (order by unique1
rows (SELECT unique1 FROM tenk1 ORDER BY unique1 LIMIT 1) + 1 PRECEDING),
unique1 unique1
FROM tenk1 WHERE unique1 < 10; FROM tenk1 WHERE unique1 < 10;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment