Commit 554106b1 authored by Robert Haas's avatar Robert Haas

tableam: Provide helper functions for relation sizing.

Most block-based table AMs will need the exact same implementation of
the relation_size callback as the heap, and if they use a standard
page layout, they will likely need an implementation of the
relation_estimate_size callback that is very similar to that of the
heap.  Rearrange to facilitate code reuse.

Patch by me, reviewed by Michael Paquier, Daniel Gustafsson, and
Álvaro Herrera.

parent 482501d4
......@@ -19,8 +19,6 @@
#include "postgres.h"
#include <math.h>
#include "miscadmin.h"
#include "access/genam.h"
......@@ -37,7 +35,6 @@
#include "catalog/storage_xlog.h"
#include "commands/progress.h"
#include "executor/executor.h"
#include "optimizer/plancat.h"
#include "pgstat.h"
#include "storage/bufmgr.h"
#include "storage/bufpage.h"
......@@ -1991,26 +1988,6 @@ heapam_scan_get_blocks_done(HeapScanDesc hscan)
* ------------------------------------------------------------------------
static uint64
heapam_relation_size(Relation rel, ForkNumber forkNumber)
uint64 nblocks = 0;
/* Open it at the smgr level if not already done */
/* InvalidForkNumber indicates returning the size for all forks */
if (forkNumber == InvalidForkNumber)
for (int i = 0; i < MAX_FORKNUM; i++)
nblocks += smgrnblocks(rel->rd_smgr, i);
nblocks = smgrnblocks(rel->rd_smgr, forkNumber);
return nblocks * BLCKSZ;
* Check to see whether the table needs a TOAST table. It does only if
* (1) there are any toastable attributes, and (2) the maximum length
......@@ -2068,106 +2045,20 @@ heapam_relation_needs_toast_table(Relation rel)
* ------------------------------------------------------------------------
(MAXALIGN(SizeofHeapTupleHeader) + sizeof(ItemIdData))
(BLCKSZ - SizeOfPageHeaderData)
static void
heapam_estimate_rel_size(Relation rel, int32 *attr_widths,
BlockNumber *pages, double *tuples,
double *allvisfrac)
BlockNumber curpages;
BlockNumber relpages;
double reltuples;
BlockNumber relallvisible;
double density;
/* it has storage, ok to call the smgr */
curpages = RelationGetNumberOfBlocks(rel);
/* coerce values in pg_class to more desirable types */
relpages = (BlockNumber) rel->rd_rel->relpages;
reltuples = (double) rel->rd_rel->reltuples;
relallvisible = (BlockNumber) rel->rd_rel->relallvisible;
* HACK: if the relation has never yet been vacuumed, use a minimum size
* estimate of 10 pages. The idea here is to avoid assuming a
* newly-created table is really small, even if it currently is, because
* that may not be true once some data gets loaded into it. Once a vacuum
* or analyze cycle has been done on it, it's more reasonable to believe
* the size is somewhat stable.
* (Note that this is only an issue if the plan gets cached and used again
* after the table has been filled. What we're trying to avoid is using a
* nestloop-type plan on a table that has grown substantially since the
* plan was made. Normally, autovacuum/autoanalyze will occur once enough
* inserts have happened and cause cached-plan invalidation; but that
* doesn't happen instantaneously, and it won't happen at all for cases
* such as temporary tables.)
* We approximate "never vacuumed" by "has relpages = 0", which means this
* will also fire on genuinely empty relations. Not great, but
* fortunately that's a seldom-seen case in the real world, and it
* shouldn't degrade the quality of the plan too much anyway to err in
* this direction.
* If the table has inheritance children, we don't apply this heuristic.
* Totally empty parent tables are quite common, so we should be willing
* to believe that they are empty.
if (curpages < 10 &&
relpages == 0 &&
curpages = 10;
/* report estimated # pages */
*pages = curpages;
/* quick exit if rel is clearly empty */
if (curpages == 0)
*tuples = 0;
*allvisfrac = 0;
/* estimate number of tuples from previous tuple density */
if (relpages > 0)
density = reltuples / (double) relpages;
* When we have no data because the relation was truncated, estimate
* tuple width from attribute datatypes. We assume here that the
* pages are completely full, which is OK for tables (since they've
* presumably not been VACUUMed yet) but is probably an overestimate
* for indexes. Fortunately get_relation_info() can clamp the
* overestimate to the parent table's size.
* Note: this code intentionally disregards alignment considerations,
* because (a) that would be gilding the lily considering how crude
* the estimate is, and (b) it creates platform dependencies in the
* default plans which are kind of a headache for regression testing.
int32 tuple_width;
tuple_width = get_rel_data_width(rel, attr_widths);
tuple_width += MAXALIGN(SizeofHeapTupleHeader);
tuple_width += sizeof(ItemIdData);
/* note: integer division is intentional here */
density = (BLCKSZ - SizeOfPageHeaderData) / tuple_width;
*tuples = rint(density * (double) curpages);
* We use relallvisible as-is, rather than scaling it up like we do for
* the pages and tuples counts, on the theory that any pages added since
* the last VACUUM are most likely not marked all-visible. But costsize.c
* wants it converted to a fraction.
if (relallvisible == 0 || curpages <= 0)
*allvisfrac = 0;
else if ((double) relallvisible >= curpages)
*allvisfrac = 1;
*allvisfrac = (double) relallvisible / curpages;
table_block_relation_estimate_size(rel, attr_widths, pages,
tuples, allvisfrac,
......@@ -2644,7 +2535,7 @@ static const TableAmRoutine heapam_methods = {
.index_build_range_scan = heapam_index_build_range_scan,
.index_validate_scan = heapam_index_validate_scan,
.relation_size = heapam_relation_size,
.relation_size = table_block_relation_size,
.relation_needs_toast_table = heapam_relation_needs_toast_table,
.relation_estimate_size = heapam_estimate_rel_size,
......@@ -19,11 +19,15 @@
#include "postgres.h"
#include <math.h>
#include "access/heapam.h" /* for ss_* */
#include "access/tableam.h"
#include "access/xact.h"
#include "optimizer/plancat.h"
#include "storage/bufmgr.h"
#include "storage/shmem.h"
#include "storage/smgr.h"
/* GUC variables */
......@@ -486,3 +490,160 @@ table_block_parallelscan_nextpage(Relation rel, ParallelBlockTableScanDesc pbsca
return page;
/* ----------------------------------------------------------------------------
* Helper functions to implement relation sizing for block oriented AMs.
* ----------------------------------------------------------------------------
* table_block_relation_size
* If a table AM uses the various relation forks as the sole place where data
* is stored, and if it uses them in the expected manner (e.g. the actual data
* is in the main fork rather than some other), it can use this implementation
* of the relation_size callback rather than implementing its own.
table_block_relation_size(Relation rel, ForkNumber forkNumber)
uint64 nblocks = 0;
/* Open it at the smgr level if not already done */
/* InvalidForkNumber indicates returning the size for all forks */
if (forkNumber == InvalidForkNumber)
for (int i = 0; i < MAX_FORKNUM; i++)
nblocks += smgrnblocks(rel->rd_smgr, i);
nblocks = smgrnblocks(rel->rd_smgr, forkNumber);
return nblocks * BLCKSZ;
* table_block_relation_estimate_size
* This function can't be directly used as the implementation of the
* relation_estimate_size callback, because it has a few additional parameters.
* Instead, it is intended to be used as a helper function; the caller can
* pass through the arguments to its relation_estimate_size function plus the
* additional values required here.
* overhead_bytes_per_tuple should contain the approximate number of bytes
* of storage required to store a tuple above and beyond what is required for
* the tuple data proper. Typically, this would include things like the
* size of the tuple header and item pointer. This is only used for query
* planning, so a table AM where the value is not constant could choose to
* pass a "best guess".
* usable_bytes_per_page should contain the approximate number of bytes per
* page usable for tuple data, excluding the page header and any anticipated
* special space.
table_block_relation_estimate_size(Relation rel, int32 *attr_widths,
BlockNumber *pages, double *tuples,
double *allvisfrac,
Size overhead_bytes_per_tuple,
Size usable_bytes_per_page)
BlockNumber curpages;
BlockNumber relpages;
double reltuples;
BlockNumber relallvisible;
double density;
/* it should have storage, so we can call the smgr */
curpages = RelationGetNumberOfBlocks(rel);
/* coerce values in pg_class to more desirable types */
relpages = (BlockNumber) rel->rd_rel->relpages;
reltuples = (double) rel->rd_rel->reltuples;
relallvisible = (BlockNumber) rel->rd_rel->relallvisible;
* HACK: if the relation has never yet been vacuumed, use a minimum size
* estimate of 10 pages. The idea here is to avoid assuming a
* newly-created table is really small, even if it currently is, because
* that may not be true once some data gets loaded into it. Once a vacuum
* or analyze cycle has been done on it, it's more reasonable to believe
* the size is somewhat stable.
* (Note that this is only an issue if the plan gets cached and used again
* after the table has been filled. What we're trying to avoid is using a
* nestloop-type plan on a table that has grown substantially since the
* plan was made. Normally, autovacuum/autoanalyze will occur once enough
* inserts have happened and cause cached-plan invalidation; but that
* doesn't happen instantaneously, and it won't happen at all for cases
* such as temporary tables.)
* We approximate "never vacuumed" by "has relpages = 0", which means this
* will also fire on genuinely empty relations. Not great, but
* fortunately that's a seldom-seen case in the real world, and it
* shouldn't degrade the quality of the plan too much anyway to err in
* this direction.
* If the table has inheritance children, we don't apply this heuristic.
* Totally empty parent tables are quite common, so we should be willing
* to believe that they are empty.
if (curpages < 10 &&
relpages == 0 &&
curpages = 10;
/* report estimated # pages */
*pages = curpages;
/* quick exit if rel is clearly empty */
if (curpages == 0)
*tuples = 0;
*allvisfrac = 0;
/* estimate number of tuples from previous tuple density */
if (relpages > 0)
density = reltuples / (double) relpages;
* When we have no data because the relation was truncated, estimate
* tuple width from attribute datatypes. We assume here that the
* pages are completely full, which is OK for tables (since they've
* presumably not been VACUUMed yet) but is probably an overestimate
* for indexes. Fortunately get_relation_info() can clamp the
* overestimate to the parent table's size.
* Note: this code intentionally disregards alignment considerations,
* because (a) that would be gilding the lily considering how crude
* the estimate is, (b) it creates platform dependencies in the
* default plans which are kind of a headache for regression testing,
* and (c) different table AMs might use different padding schemes.
int32 tuple_width;
tuple_width = get_rel_data_width(rel, attr_widths);
tuple_width += overhead_bytes_per_tuple;
/* note: integer division is intentional here */
density = usable_bytes_per_page / tuple_width;
*tuples = rint(density * (double) curpages);
* We use relallvisible as-is, rather than scaling it up like we do for
* the pages and tuples counts, on the theory that any pages added since
* the last VACUUM are most likely not marked all-visible. But costsize.c
* wants it converted to a fraction.
if (relallvisible == 0 || curpages <= 0)
*allvisfrac = 0;
else if ((double) relallvisible >= curpages)
*allvisfrac = 1;
*allvisfrac = (double) relallvisible / curpages;
......@@ -1727,6 +1727,20 @@ extern void table_block_parallelscan_startblock_init(Relation rel,
ParallelBlockTableScanDesc pbscan);
/* ----------------------------------------------------------------------------
* Helper functions to implement relation sizing for block oriented AMs.
* ----------------------------------------------------------------------------
extern uint64 table_block_relation_size(Relation rel, ForkNumber forkNumber);
extern void table_block_relation_estimate_size(Relation rel,
int32 *attr_widths,
BlockNumber *pages,
double *tuples,
double *allvisfrac,
Size overhead_bytes_per_tuple,
Size usable_bytes_per_page);
/* ----------------------------------------------------------------------------
* Functions in tableamapi.c
* ----------------------------------------------------------------------------
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment