Commit b4b6923e authored by Tom Lane's avatar Tom Lane

Fix VACUUM so that it always updates pg_class.reltuples/relpages.

When we added the ability for vacuum to skip heap pages by consulting the
visibility map, we made it just not update the reltuples/relpages
statistics if it skipped any pages.  But this could leave us with extremely
out-of-date stats for a table that contains any unchanging areas,
especially for TOAST tables which never get processed by ANALYZE.  In
particular this could result in autovacuum making poor decisions about when
to process the table, as in recent report from Florian Helmberger.  And in
general it's a bad idea to not update the stats at all.  Instead, use the
previous values of reltuples/relpages as an estimate of the tuple density
in unvisited pages.  This approach results in a "moving average" estimate
of reltuples, which should converge to the correct value over multiple
VACUUM and ANALYZE cycles even when individual measurements aren't very
good.

This new method for updating reltuples is used by both VACUUM and ANALYZE,
with the result that we no longer need the grotty interconnections that
caused ANALYZE to not update the stats depending on what had happened
in the parent VACUUM command.

Also, fix the logic for skipping all-visible pages during VACUUM so that it
looks ahead rather than behind to decide what to do, as per a suggestion
from Greg Stark.  This eliminates useless scanning of all-visible pages at
the start of the relation or just after a not-all-visible page.  In
particular, the first few pages of the relation will not be invariably
included in the scanned pages, which seems to help in not overweighting
them in the reltuples estimate.

Back-patch to 8.4, where the visibility map was introduced.
parent 3001b763
...@@ -84,8 +84,7 @@ static MemoryContext anl_context = NULL; ...@@ -84,8 +84,7 @@ static MemoryContext anl_context = NULL;
static BufferAccessStrategy vac_strategy; static BufferAccessStrategy vac_strategy;
static void do_analyze_rel(Relation onerel, VacuumStmt *vacstmt, static void do_analyze_rel(Relation onerel, VacuumStmt *vacstmt, bool inh);
bool update_reltuples, bool inh);
static void BlockSampler_Init(BlockSampler bs, BlockNumber nblocks, static void BlockSampler_Init(BlockSampler bs, BlockNumber nblocks,
int samplesize); int samplesize);
static bool BlockSampler_HasMore(BlockSampler bs); static bool BlockSampler_HasMore(BlockSampler bs);
...@@ -115,18 +114,9 @@ static bool std_typanalyze(VacAttrStats *stats); ...@@ -115,18 +114,9 @@ static bool std_typanalyze(VacAttrStats *stats);
/* /*
* analyze_rel() -- analyze one relation * analyze_rel() -- analyze one relation
*
* If update_reltuples is true, we update reltuples and relpages columns
* in pg_class. Caller should pass false if we're part of VACUUM ANALYZE,
* and the VACUUM didn't skip any pages. We only have an approximate count,
* so we don't want to overwrite the accurate values already inserted by the
* VACUUM in that case. VACUUM always scans all indexes, however, so the
* pg_class entries for indexes are never updated if we're part of VACUUM
* ANALYZE.
*/ */
void void
analyze_rel(Oid relid, VacuumStmt *vacstmt, analyze_rel(Oid relid, VacuumStmt *vacstmt, BufferAccessStrategy bstrategy)
BufferAccessStrategy bstrategy, bool update_reltuples)
{ {
Relation onerel; Relation onerel;
...@@ -238,13 +228,13 @@ analyze_rel(Oid relid, VacuumStmt *vacstmt, ...@@ -238,13 +228,13 @@ analyze_rel(Oid relid, VacuumStmt *vacstmt,
/* /*
* Do the normal non-recursive ANALYZE. * Do the normal non-recursive ANALYZE.
*/ */
do_analyze_rel(onerel, vacstmt, update_reltuples, false); do_analyze_rel(onerel, vacstmt, false);
/* /*
* If there are child tables, do recursive ANALYZE. * If there are child tables, do recursive ANALYZE.
*/ */
if (onerel->rd_rel->relhassubclass) if (onerel->rd_rel->relhassubclass)
do_analyze_rel(onerel, vacstmt, false, true); do_analyze_rel(onerel, vacstmt, true);
/* /*
* Close source relation now, but keep lock so that no one deletes it * Close source relation now, but keep lock so that no one deletes it
...@@ -267,8 +257,7 @@ analyze_rel(Oid relid, VacuumStmt *vacstmt, ...@@ -267,8 +257,7 @@ analyze_rel(Oid relid, VacuumStmt *vacstmt,
* do_analyze_rel() -- analyze one relation, recursively or not * do_analyze_rel() -- analyze one relation, recursively or not
*/ */
static void static void
do_analyze_rel(Relation onerel, VacuumStmt *vacstmt, do_analyze_rel(Relation onerel, VacuumStmt *vacstmt, bool inh)
bool update_reltuples, bool inh)
{ {
int attr_cnt, int attr_cnt,
tcnt, tcnt,
...@@ -437,9 +426,9 @@ do_analyze_rel(Relation onerel, VacuumStmt *vacstmt, ...@@ -437,9 +426,9 @@ do_analyze_rel(Relation onerel, VacuumStmt *vacstmt,
} }
/* /*
* Quit if no analyzable columns and no pg_class update needed. * Quit if no analyzable columns.
*/ */
if (attr_cnt <= 0 && !analyzableindex && !update_reltuples) if (attr_cnt <= 0 && !analyzableindex)
goto cleanup; goto cleanup;
/* /*
...@@ -549,10 +538,10 @@ do_analyze_rel(Relation onerel, VacuumStmt *vacstmt, ...@@ -549,10 +538,10 @@ do_analyze_rel(Relation onerel, VacuumStmt *vacstmt,
} }
/* /*
* Update pages/tuples stats in pg_class, but not if we're inside a VACUUM * Update pages/tuples stats in pg_class ... but not if we're doing
* that got a more precise number. * inherited stats.
*/ */
if (update_reltuples) if (!inh)
vac_update_relstats(onerel, vac_update_relstats(onerel,
RelationGetNumberOfBlocks(onerel), RelationGetNumberOfBlocks(onerel),
totalrows, hasindex, InvalidTransactionId); totalrows, hasindex, InvalidTransactionId);
...@@ -562,7 +551,7 @@ do_analyze_rel(Relation onerel, VacuumStmt *vacstmt, ...@@ -562,7 +551,7 @@ do_analyze_rel(Relation onerel, VacuumStmt *vacstmt,
* VACUUM ANALYZE, don't overwrite the accurate count already inserted by * VACUUM ANALYZE, don't overwrite the accurate count already inserted by
* VACUUM. * VACUUM.
*/ */
if (!(vacstmt->options & VACOPT_VACUUM)) if (!inh && !(vacstmt->options & VACOPT_VACUUM))
{ {
for (ind = 0; ind < nindexes; ind++) for (ind = 0; ind < nindexes; ind++)
{ {
...@@ -577,13 +566,12 @@ do_analyze_rel(Relation onerel, VacuumStmt *vacstmt, ...@@ -577,13 +566,12 @@ do_analyze_rel(Relation onerel, VacuumStmt *vacstmt,
} }
/* /*
* Report ANALYZE to the stats collector, too; likewise, tell it to adopt * Report ANALYZE to the stats collector, too. However, if doing
* these numbers only if we're not inside a VACUUM that got a better * inherited stats we shouldn't report, because the stats collector only
* number. However, a call with inh = true shouldn't reset the stats. * tracks per-table stats.
*/ */
if (!inh) if (!inh)
pgstat_report_analyze(onerel, update_reltuples, pgstat_report_analyze(onerel, totalrows, totaldeadrows);
totalrows, totaldeadrows);
/* We skip to here if there were no analyzable columns */ /* We skip to here if there were no analyzable columns */
cleanup: cleanup:
...@@ -1243,18 +1231,19 @@ acquire_sample_rows(Relation onerel, HeapTuple *rows, int targrows, ...@@ -1243,18 +1231,19 @@ acquire_sample_rows(Relation onerel, HeapTuple *rows, int targrows,
qsort((void *) rows, numrows, sizeof(HeapTuple), compare_rows); qsort((void *) rows, numrows, sizeof(HeapTuple), compare_rows);
/* /*
* Estimate total numbers of rows in relation. * Estimate total numbers of rows in relation. For live rows, use
* vac_estimate_reltuples; for dead rows, we have no source of old
* information, so we have to assume the density is the same in unseen
* pages as in the pages we scanned.
*/ */
*totalrows = vac_estimate_reltuples(onerel, true,
totalblocks,
bs.m,
liverows);
if (bs.m > 0) if (bs.m > 0)
{ *totaldeadrows = floor((deadrows / bs.m) * totalblocks + 0.5);
*totalrows = floor((liverows * totalblocks) / bs.m + 0.5);
*totaldeadrows = floor((deadrows * totalblocks) / bs.m + 0.5);
}
else else
{
*totalrows = 0.0;
*totaldeadrows = 0.0; *totaldeadrows = 0.0;
}
/* /*
* Emit some interesting relation info * Emit some interesting relation info
......
...@@ -20,6 +20,8 @@ ...@@ -20,6 +20,8 @@
*/ */
#include "postgres.h" #include "postgres.h"
#include <math.h>
#include "access/clog.h" #include "access/clog.h"
#include "access/genam.h" #include "access/genam.h"
#include "access/heapam.h" #include "access/heapam.h"
...@@ -62,7 +64,7 @@ static BufferAccessStrategy vac_strategy; ...@@ -62,7 +64,7 @@ static BufferAccessStrategy vac_strategy;
static List *get_rel_oids(Oid relid, const RangeVar *vacrel); static List *get_rel_oids(Oid relid, const RangeVar *vacrel);
static void vac_truncate_clog(TransactionId frozenXID); static void vac_truncate_clog(TransactionId frozenXID);
static bool vacuum_rel(Oid relid, VacuumStmt *vacstmt, bool do_toast, static bool vacuum_rel(Oid relid, VacuumStmt *vacstmt, bool do_toast,
bool for_wraparound, bool *scanned_all); bool for_wraparound);
/* /*
...@@ -219,12 +221,10 @@ vacuum(VacuumStmt *vacstmt, Oid relid, bool do_toast, ...@@ -219,12 +221,10 @@ vacuum(VacuumStmt *vacstmt, Oid relid, bool do_toast,
foreach(cur, relations) foreach(cur, relations)
{ {
Oid relid = lfirst_oid(cur); Oid relid = lfirst_oid(cur);
bool scanned_all = false;
if (vacstmt->options & VACOPT_VACUUM) if (vacstmt->options & VACOPT_VACUUM)
{ {
if (!vacuum_rel(relid, vacstmt, do_toast, for_wraparound, if (!vacuum_rel(relid, vacstmt, do_toast, for_wraparound))
&scanned_all))
continue; continue;
} }
...@@ -241,7 +241,7 @@ vacuum(VacuumStmt *vacstmt, Oid relid, bool do_toast, ...@@ -241,7 +241,7 @@ vacuum(VacuumStmt *vacstmt, Oid relid, bool do_toast,
PushActiveSnapshot(GetTransactionSnapshot()); PushActiveSnapshot(GetTransactionSnapshot());
} }
analyze_rel(relid, vacstmt, vac_strategy, !scanned_all); analyze_rel(relid, vacstmt, vac_strategy);
if (use_own_xacts) if (use_own_xacts)
{ {
...@@ -453,6 +453,79 @@ vacuum_set_xid_limits(int freeze_min_age, ...@@ -453,6 +453,79 @@ vacuum_set_xid_limits(int freeze_min_age,
} }
/*
* vac_estimate_reltuples() -- estimate the new value for pg_class.reltuples
*
* If we scanned the whole relation then we should just use the count of
* live tuples seen; but if we did not, we should not trust the count
* unreservedly, especially not in VACUUM, which may have scanned a quite
* nonrandom subset of the table. When we have only partial information,
* we take the old value of pg_class.reltuples as a measurement of the
* tuple density in the unscanned pages.
*
* This routine is shared by VACUUM and ANALYZE.
*/
double
vac_estimate_reltuples(Relation relation, bool is_analyze,
BlockNumber total_pages,
BlockNumber scanned_pages,
double scanned_tuples)
{
BlockNumber old_rel_pages = relation->rd_rel->relpages;
double old_rel_tuples = relation->rd_rel->reltuples;
double old_density;
double new_density;
double multiplier;
double updated_density;
/* If we did scan the whole table, just use the count as-is */
if (scanned_pages >= total_pages)
return scanned_tuples;
/*
* If scanned_pages is zero but total_pages isn't, keep the existing
* value of reltuples.
*/
if (scanned_pages == 0)
return old_rel_tuples;
/*
* If old value of relpages is zero, old density is indeterminate; we
* can't do much except scale up scanned_tuples to match total_pages.
*/
if (old_rel_pages == 0)
return floor((scanned_tuples / scanned_pages) * total_pages + 0.5);
/*
* Okay, we've covered the corner cases. The normal calculation is to
* convert the old measurement to a density (tuples per page), then
* update the density using an exponential-moving-average approach,
* and finally compute reltuples as updated_density * total_pages.
*
* For ANALYZE, the moving average multiplier is just the fraction of
* the table's pages we scanned. This is equivalent to assuming
* that the tuple density in the unscanned pages didn't change. Of
* course, it probably did, if the new density measurement is different.
* But over repeated cycles, the value of reltuples will converge towards
* the correct value, if repeated measurements show the same new density.
*
* For VACUUM, the situation is a bit different: we have looked at a
* nonrandom sample of pages, but we know for certain that the pages we
* didn't look at are precisely the ones that haven't changed lately.
* Thus, there is a reasonable argument for doing exactly the same thing
* as for the ANALYZE case, that is use the old density measurement as
* the value for the unscanned pages.
*
* This logic could probably use further refinement.
*/
old_density = old_rel_tuples / old_rel_pages;
new_density = scanned_tuples / scanned_pages;
multiplier = (double) scanned_pages / (double) total_pages;
updated_density = old_density + (new_density - old_density) * multiplier;
return floor(updated_density * total_pages + 0.5);
}
/* /*
* vac_update_relstats() -- update statistics for one relation * vac_update_relstats() -- update statistics for one relation
* *
...@@ -480,7 +553,7 @@ vacuum_set_xid_limits(int freeze_min_age, ...@@ -480,7 +553,7 @@ vacuum_set_xid_limits(int freeze_min_age,
* somebody vacuuming pg_class might think they could delete a tuple * somebody vacuuming pg_class might think they could delete a tuple
* marked with xmin = our xid. * marked with xmin = our xid.
* *
* This routine is shared by VACUUM and stand-alone ANALYZE. * This routine is shared by VACUUM and ANALYZE.
*/ */
void void
vac_update_relstats(Relation relation, vac_update_relstats(Relation relation,
...@@ -758,14 +831,10 @@ vac_truncate_clog(TransactionId frozenXID) ...@@ -758,14 +831,10 @@ vac_truncate_clog(TransactionId frozenXID)
* many small transactions. Otherwise, two-phase locking would require * many small transactions. Otherwise, two-phase locking would require
* us to lock the entire database during one pass of the vacuum cleaner. * us to lock the entire database during one pass of the vacuum cleaner.
* *
* We'll return true in *scanned_all if the vacuum scanned all heap
* pages, and updated pg_class.
*
* At entry and exit, we are not inside a transaction. * At entry and exit, we are not inside a transaction.
*/ */
static bool static bool
vacuum_rel(Oid relid, VacuumStmt *vacstmt, bool do_toast, bool for_wraparound, vacuum_rel(Oid relid, VacuumStmt *vacstmt, bool do_toast, bool for_wraparound)
bool *scanned_all)
{ {
LOCKMODE lmode; LOCKMODE lmode;
Relation onerel; Relation onerel;
...@@ -775,9 +844,6 @@ vacuum_rel(Oid relid, VacuumStmt *vacstmt, bool do_toast, bool for_wraparound, ...@@ -775,9 +844,6 @@ vacuum_rel(Oid relid, VacuumStmt *vacstmt, bool do_toast, bool for_wraparound,
int save_sec_context; int save_sec_context;
int save_nestlevel; int save_nestlevel;
if (scanned_all)
*scanned_all = false;
/* Begin a transaction for vacuuming this relation */ /* Begin a transaction for vacuuming this relation */
StartTransactionCommand(); StartTransactionCommand();
...@@ -971,7 +1037,7 @@ vacuum_rel(Oid relid, VacuumStmt *vacstmt, bool do_toast, bool for_wraparound, ...@@ -971,7 +1037,7 @@ vacuum_rel(Oid relid, VacuumStmt *vacstmt, bool do_toast, bool for_wraparound,
vacstmt->freeze_min_age, vacstmt->freeze_table_age); vacstmt->freeze_min_age, vacstmt->freeze_table_age);
} }
else else
lazy_vacuum_rel(onerel, vacstmt, vac_strategy, scanned_all); lazy_vacuum_rel(onerel, vacstmt, vac_strategy);
/* Roll back any GUC changes executed by index functions */ /* Roll back any GUC changes executed by index functions */
AtEOXact_GUC(false, save_nestlevel); AtEOXact_GUC(false, save_nestlevel);
...@@ -997,7 +1063,7 @@ vacuum_rel(Oid relid, VacuumStmt *vacstmt, bool do_toast, bool for_wraparound, ...@@ -997,7 +1063,7 @@ vacuum_rel(Oid relid, VacuumStmt *vacstmt, bool do_toast, bool for_wraparound,
* totally unimportant for toast relations. * totally unimportant for toast relations.
*/ */
if (toast_relid != InvalidOid) if (toast_relid != InvalidOid)
vacuum_rel(toast_relid, vacstmt, false, for_wraparound, NULL); vacuum_rel(toast_relid, vacstmt, false, for_wraparound);
/* /*
* Now release the session-level lock on the master table. * Now release the session-level lock on the master table.
......
...@@ -77,17 +77,18 @@ ...@@ -77,17 +77,18 @@
* Before we consider skipping a page that's marked as clean in * Before we consider skipping a page that's marked as clean in
* visibility map, we must've seen at least this many clean pages. * visibility map, we must've seen at least this many clean pages.
*/ */
#define SKIP_PAGES_THRESHOLD 32 #define SKIP_PAGES_THRESHOLD ((BlockNumber) 32)
typedef struct LVRelStats typedef struct LVRelStats
{ {
/* hasindex = true means two-pass strategy; false means one-pass */ /* hasindex = true means two-pass strategy; false means one-pass */
bool hasindex; bool hasindex;
bool scanned_all; /* have we scanned all pages (this far)? */
/* Overall statistics about rel */ /* Overall statistics about rel */
BlockNumber rel_pages; BlockNumber rel_pages; /* total number of pages */
BlockNumber scanned_pages; /* number of pages we examined */
double scanned_tuples; /* counts only tuples on scanned pages */
double old_rel_tuples; /* previous value of pg_class.reltuples */ double old_rel_tuples; /* previous value of pg_class.reltuples */
double rel_tuples; /* counts only tuples on scanned pages */ double new_rel_tuples; /* new estimated total # of tuples */
BlockNumber pages_removed; BlockNumber pages_removed;
double tuples_deleted; double tuples_deleted;
BlockNumber nonempty_pages; /* actually, last nonempty page + 1 */ BlockNumber nonempty_pages; /* actually, last nonempty page + 1 */
...@@ -143,7 +144,7 @@ static int vac_cmp_itemptr(const void *left, const void *right); ...@@ -143,7 +144,7 @@ static int vac_cmp_itemptr(const void *left, const void *right);
*/ */
void void
lazy_vacuum_rel(Relation onerel, VacuumStmt *vacstmt, lazy_vacuum_rel(Relation onerel, VacuumStmt *vacstmt,
BufferAccessStrategy bstrategy, bool *scanned_all) BufferAccessStrategy bstrategy)
{ {
LVRelStats *vacrelstats; LVRelStats *vacrelstats;
Relation *Irel; Relation *Irel;
...@@ -175,7 +176,6 @@ lazy_vacuum_rel(Relation onerel, VacuumStmt *vacstmt, ...@@ -175,7 +176,6 @@ lazy_vacuum_rel(Relation onerel, VacuumStmt *vacstmt,
vacrelstats = (LVRelStats *) palloc0(sizeof(LVRelStats)); vacrelstats = (LVRelStats *) palloc0(sizeof(LVRelStats));
vacrelstats->scanned_all = true; /* will be cleared if we skip a page */
vacrelstats->old_rel_tuples = onerel->rd_rel->reltuples; vacrelstats->old_rel_tuples = onerel->rd_rel->reltuples;
vacrelstats->num_index_scans = 0; vacrelstats->num_index_scans = 0;
...@@ -205,24 +205,20 @@ lazy_vacuum_rel(Relation onerel, VacuumStmt *vacstmt, ...@@ -205,24 +205,20 @@ lazy_vacuum_rel(Relation onerel, VacuumStmt *vacstmt,
FreeSpaceMapVacuum(onerel); FreeSpaceMapVacuum(onerel);
/* /*
* Update statistics in pg_class. But only if we didn't skip any pages; * Update statistics in pg_class. But don't change relfrozenxid if we
* the tuple count only includes tuples from the pages we've visited, and * skipped any pages.
* we haven't frozen tuples in unvisited pages either. The page count is
* accurate in any case, but because we use the reltuples / relpages ratio
* in the planner, it's better to not update relpages either if we can't
* update reltuples.
*/ */
if (vacrelstats->scanned_all)
vac_update_relstats(onerel, vac_update_relstats(onerel,
vacrelstats->rel_pages, vacrelstats->rel_tuples, vacrelstats->rel_pages, vacrelstats->new_rel_tuples,
vacrelstats->hasindex, vacrelstats->hasindex,
(vacrelstats->scanned_pages < vacrelstats->rel_pages) ?
InvalidTransactionId :
FreezeLimit); FreezeLimit);
/* report results to the stats collector, too */ /* report results to the stats collector, too */
pgstat_report_vacuum(RelationGetRelid(onerel), pgstat_report_vacuum(RelationGetRelid(onerel),
onerel->rd_rel->relisshared, onerel->rd_rel->relisshared,
vacrelstats->scanned_all, vacrelstats->new_rel_tuples);
vacrelstats->rel_tuples);
/* and log the action if appropriate */ /* and log the action if appropriate */
if (IsAutoVacuumWorkerProcess() && Log_autovacuum_min_duration >= 0) if (IsAutoVacuumWorkerProcess() && Log_autovacuum_min_duration >= 0)
...@@ -239,13 +235,12 @@ lazy_vacuum_rel(Relation onerel, VacuumStmt *vacstmt, ...@@ -239,13 +235,12 @@ lazy_vacuum_rel(Relation onerel, VacuumStmt *vacstmt,
get_namespace_name(RelationGetNamespace(onerel)), get_namespace_name(RelationGetNamespace(onerel)),
RelationGetRelationName(onerel), RelationGetRelationName(onerel),
vacrelstats->num_index_scans, vacrelstats->num_index_scans,
vacrelstats->pages_removed, vacrelstats->rel_pages, vacrelstats->pages_removed,
vacrelstats->tuples_deleted, vacrelstats->rel_tuples, vacrelstats->rel_pages,
vacrelstats->tuples_deleted,
vacrelstats->new_rel_tuples,
pg_rusage_show(&ru0)))); pg_rusage_show(&ru0))));
} }
if (scanned_all)
*scanned_all = vacrelstats->scanned_all;
} }
/* /*
...@@ -301,7 +296,6 @@ lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats, ...@@ -301,7 +296,6 @@ lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats,
HeapTupleData tuple; HeapTupleData tuple;
char *relname; char *relname;
BlockNumber empty_pages, BlockNumber empty_pages,
scanned_pages,
vacuumed_pages; vacuumed_pages;
double num_tuples, double num_tuples,
tups_vacuumed, tups_vacuumed,
...@@ -311,7 +305,8 @@ lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats, ...@@ -311,7 +305,8 @@ lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats,
int i; int i;
PGRUsage ru0; PGRUsage ru0;
Buffer vmbuffer = InvalidBuffer; Buffer vmbuffer = InvalidBuffer;
BlockNumber all_visible_streak; BlockNumber next_not_all_visible_block;
bool skipping_all_visible_blocks;
pg_rusage_init(&ru0); pg_rusage_init(&ru0);
...@@ -321,7 +316,7 @@ lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats, ...@@ -321,7 +316,7 @@ lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats,
get_namespace_name(RelationGetNamespace(onerel)), get_namespace_name(RelationGetNamespace(onerel)),
relname))); relname)));
empty_pages = vacuumed_pages = scanned_pages = 0; empty_pages = vacuumed_pages = 0;
num_tuples = tups_vacuumed = nkeep = nunused = 0; num_tuples = tups_vacuumed = nkeep = nunused = 0;
indstats = (IndexBulkDeleteResult **) indstats = (IndexBulkDeleteResult **)
...@@ -329,12 +324,47 @@ lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats, ...@@ -329,12 +324,47 @@ lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats,
nblocks = RelationGetNumberOfBlocks(onerel); nblocks = RelationGetNumberOfBlocks(onerel);
vacrelstats->rel_pages = nblocks; vacrelstats->rel_pages = nblocks;
vacrelstats->scanned_pages = 0;
vacrelstats->nonempty_pages = 0; vacrelstats->nonempty_pages = 0;
vacrelstats->latestRemovedXid = InvalidTransactionId; vacrelstats->latestRemovedXid = InvalidTransactionId;
lazy_space_alloc(vacrelstats, nblocks); lazy_space_alloc(vacrelstats, nblocks);
all_visible_streak = 0; /*
* We want to skip pages that don't require vacuuming according to the
* visibility map, but only when we can skip at least SKIP_PAGES_THRESHOLD
* consecutive pages. Since we're reading sequentially, the OS should be
* doing readahead for us, so there's no gain in skipping a page now and
* then; that's likely to disable readahead and so be counterproductive.
* Also, skipping even a single page means that we can't update
* relfrozenxid, so we only want to do it if we can skip a goodly number
* of pages.
*
* Before entering the main loop, establish the invariant that
* next_not_all_visible_block is the next block number >= blkno that's
* not all-visible according to the visibility map, or nblocks if there's
* no such block. Also, we set up the skipping_all_visible_blocks flag,
* which is needed because we need hysteresis in the decision: once we've
* started skipping blocks, we may as well skip everything up to the next
* not-all-visible block.
*
* Note: if scan_all is true, we won't actually skip any pages; but we
* maintain next_not_all_visible_block anyway, so as to set up the
* all_visible_according_to_vm flag correctly for each page.
*/
for (next_not_all_visible_block = 0;
next_not_all_visible_block < nblocks;
next_not_all_visible_block++)
{
if (!visibilitymap_test(onerel, next_not_all_visible_block, &vmbuffer))
break;
vacuum_delay_point();
}
if (next_not_all_visible_block >= SKIP_PAGES_THRESHOLD)
skipping_all_visible_blocks = true;
else
skipping_all_visible_blocks = false;
for (blkno = 0; blkno < nblocks; blkno++) for (blkno = 0; blkno < nblocks; blkno++)
{ {
Buffer buf; Buffer buf;
...@@ -347,41 +377,45 @@ lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats, ...@@ -347,41 +377,45 @@ lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats,
OffsetNumber frozen[MaxOffsetNumber]; OffsetNumber frozen[MaxOffsetNumber];
int nfrozen; int nfrozen;
Size freespace; Size freespace;
bool all_visible_according_to_vm = false; bool all_visible_according_to_vm;
bool all_visible; bool all_visible;
bool has_dead_tuples; bool has_dead_tuples;
/* if (blkno == next_not_all_visible_block)
* Skip pages that don't require vacuuming according to the visibility
* map. But only if we've seen a streak of at least
* SKIP_PAGES_THRESHOLD pages marked as clean. Since we're reading
* sequentially, the OS should be doing readahead for us and there's
* no gain in skipping a page now and then. You need a longer run of
* consecutive skipped pages before it's worthwhile. Also, skipping
* even a single page means that we can't update relfrozenxid or
* reltuples, so we only want to do it if there's a good chance to
* skip a goodly number of pages.
*/
if (!scan_all)
{
all_visible_according_to_vm =
visibilitymap_test(onerel, blkno, &vmbuffer);
if (all_visible_according_to_vm)
{ {
all_visible_streak++; /* Time to advance next_not_all_visible_block */
if (all_visible_streak >= SKIP_PAGES_THRESHOLD) for (next_not_all_visible_block++;
next_not_all_visible_block < nblocks;
next_not_all_visible_block++)
{ {
vacrelstats->scanned_all = false; if (!visibilitymap_test(onerel, next_not_all_visible_block,
continue; &vmbuffer))
break;
vacuum_delay_point();
} }
/*
* We know we can't skip the current block. But set up
* skipping_all_visible_blocks to do the right thing at the
* following blocks.
*/
if (next_not_all_visible_block - blkno > SKIP_PAGES_THRESHOLD)
skipping_all_visible_blocks = true;
else
skipping_all_visible_blocks = false;
all_visible_according_to_vm = false;
} }
else else
all_visible_streak = 0; {
/* Current block is all-visible */
if (skipping_all_visible_blocks && !scan_all)
continue;
all_visible_according_to_vm = true;
} }
vacuum_delay_point(); vacuum_delay_point();
scanned_pages++; vacrelstats->scanned_pages++;
/* /*
* If we are close to overrunning the available space for dead-tuple * If we are close to overrunning the available space for dead-tuple
...@@ -764,9 +798,15 @@ lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats, ...@@ -764,9 +798,15 @@ lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats,
} }
/* save stats for use later */ /* save stats for use later */
vacrelstats->rel_tuples = num_tuples; vacrelstats->scanned_tuples = num_tuples;
vacrelstats->tuples_deleted = tups_vacuumed; vacrelstats->tuples_deleted = tups_vacuumed;
/* now we can compute the new value for pg_class.reltuples */
vacrelstats->new_rel_tuples = vac_estimate_reltuples(onerel, false,
nblocks,
vacrelstats->scanned_pages,
num_tuples);
/* If any tuples need to be deleted, perform final vacuum cycle */ /* If any tuples need to be deleted, perform final vacuum cycle */
/* XXX put a threshold on min number of tuples here? */ /* XXX put a threshold on min number of tuples here? */
if (vacrelstats->num_dead_tuples > 0) if (vacrelstats->num_dead_tuples > 0)
...@@ -805,7 +845,8 @@ lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats, ...@@ -805,7 +845,8 @@ lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats,
ereport(elevel, ereport(elevel,
(errmsg("\"%s\": found %.0f removable, %.0f nonremovable row versions in %u out of %u pages", (errmsg("\"%s\": found %.0f removable, %.0f nonremovable row versions in %u out of %u pages",
RelationGetRelationName(onerel), RelationGetRelationName(onerel),
tups_vacuumed, num_tuples, scanned_pages, nblocks), tups_vacuumed, num_tuples,
vacrelstats->scanned_pages, nblocks),
errdetail("%.0f dead row versions cannot be removed yet.\n" errdetail("%.0f dead row versions cannot be removed yet.\n"
"There were %.0f unused item pointers.\n" "There were %.0f unused item pointers.\n"
"%u pages are entirely empty.\n" "%u pages are entirely empty.\n"
...@@ -977,10 +1018,9 @@ lazy_cleanup_index(Relation indrel, ...@@ -977,10 +1018,9 @@ lazy_cleanup_index(Relation indrel,
ivinfo.index = indrel; ivinfo.index = indrel;
ivinfo.analyze_only = false; ivinfo.analyze_only = false;
ivinfo.estimated_count = !vacrelstats->scanned_all; ivinfo.estimated_count = (vacrelstats->scanned_pages < vacrelstats->rel_pages);
ivinfo.message_level = elevel; ivinfo.message_level = elevel;
/* use rel_tuples only if we scanned all pages, else fall back */ ivinfo.num_heap_tuples = vacrelstats->new_rel_tuples;
ivinfo.num_heap_tuples = vacrelstats->scanned_all ? vacrelstats->rel_tuples : vacrelstats->old_rel_tuples;
ivinfo.strategy = vac_strategy; ivinfo.strategy = vac_strategy;
stats = index_vacuum_cleanup(&ivinfo, stats); stats = index_vacuum_cleanup(&ivinfo, stats);
...@@ -1041,8 +1081,13 @@ lazy_truncate_heap(Relation onerel, LVRelStats *vacrelstats) ...@@ -1041,8 +1081,13 @@ lazy_truncate_heap(Relation onerel, LVRelStats *vacrelstats)
new_rel_pages = RelationGetNumberOfBlocks(onerel); new_rel_pages = RelationGetNumberOfBlocks(onerel);
if (new_rel_pages != old_rel_pages) if (new_rel_pages != old_rel_pages)
{ {
/* might as well use the latest news when we update pg_class stats */ /*
vacrelstats->rel_pages = new_rel_pages; * Note: we intentionally don't update vacrelstats->rel_pages with
* the new rel size here. If we did, it would amount to assuming that
* the new pages are empty, which is unlikely. Leaving the numbers
* alone amounts to assuming that the new pages have the same tuple
* density as existing ones, which is less unlikely.
*/
UnlockRelation(onerel, AccessExclusiveLock); UnlockRelation(onerel, AccessExclusiveLock);
return; return;
} }
...@@ -1076,7 +1121,11 @@ lazy_truncate_heap(Relation onerel, LVRelStats *vacrelstats) ...@@ -1076,7 +1121,11 @@ lazy_truncate_heap(Relation onerel, LVRelStats *vacrelstats)
*/ */
UnlockRelation(onerel, AccessExclusiveLock); UnlockRelation(onerel, AccessExclusiveLock);
/* update statistics */ /*
* Update statistics. Here, it *is* correct to adjust rel_pages without
* also touching reltuples, since the tuple count wasn't changed by the
* truncation.
*/
vacrelstats->rel_pages = new_rel_pages; vacrelstats->rel_pages = new_rel_pages;
vacrelstats->pages_removed = old_rel_pages - new_rel_pages; vacrelstats->pages_removed = old_rel_pages - new_rel_pages;
......
...@@ -1246,8 +1246,7 @@ pgstat_report_autovac(Oid dboid) ...@@ -1246,8 +1246,7 @@ pgstat_report_autovac(Oid dboid)
* --------- * ---------
*/ */
void void
pgstat_report_vacuum(Oid tableoid, bool shared, bool adopt_counts, pgstat_report_vacuum(Oid tableoid, bool shared, PgStat_Counter tuples)
PgStat_Counter tuples)
{ {
PgStat_MsgVacuum msg; PgStat_MsgVacuum msg;
...@@ -1257,7 +1256,6 @@ pgstat_report_vacuum(Oid tableoid, bool shared, bool adopt_counts, ...@@ -1257,7 +1256,6 @@ pgstat_report_vacuum(Oid tableoid, bool shared, bool adopt_counts,
pgstat_setheader(&msg.m_hdr, PGSTAT_MTYPE_VACUUM); pgstat_setheader(&msg.m_hdr, PGSTAT_MTYPE_VACUUM);
msg.m_databaseid = shared ? InvalidOid : MyDatabaseId; msg.m_databaseid = shared ? InvalidOid : MyDatabaseId;
msg.m_tableoid = tableoid; msg.m_tableoid = tableoid;
msg.m_adopt_counts = adopt_counts;
msg.m_autovacuum = IsAutoVacuumWorkerProcess(); msg.m_autovacuum = IsAutoVacuumWorkerProcess();
msg.m_vacuumtime = GetCurrentTimestamp(); msg.m_vacuumtime = GetCurrentTimestamp();
msg.m_tuples = tuples; msg.m_tuples = tuples;
...@@ -1271,7 +1269,7 @@ pgstat_report_vacuum(Oid tableoid, bool shared, bool adopt_counts, ...@@ -1271,7 +1269,7 @@ pgstat_report_vacuum(Oid tableoid, bool shared, bool adopt_counts,
* -------- * --------
*/ */
void void
pgstat_report_analyze(Relation rel, bool adopt_counts, pgstat_report_analyze(Relation rel,
PgStat_Counter livetuples, PgStat_Counter deadtuples) PgStat_Counter livetuples, PgStat_Counter deadtuples)
{ {
PgStat_MsgAnalyze msg; PgStat_MsgAnalyze msg;
...@@ -1308,7 +1306,6 @@ pgstat_report_analyze(Relation rel, bool adopt_counts, ...@@ -1308,7 +1306,6 @@ pgstat_report_analyze(Relation rel, bool adopt_counts,
pgstat_setheader(&msg.m_hdr, PGSTAT_MTYPE_ANALYZE); pgstat_setheader(&msg.m_hdr, PGSTAT_MTYPE_ANALYZE);
msg.m_databaseid = rel->rd_rel->relisshared ? InvalidOid : MyDatabaseId; msg.m_databaseid = rel->rd_rel->relisshared ? InvalidOid : MyDatabaseId;
msg.m_tableoid = RelationGetRelid(rel); msg.m_tableoid = RelationGetRelid(rel);
msg.m_adopt_counts = adopt_counts;
msg.m_autovacuum = IsAutoVacuumWorkerProcess(); msg.m_autovacuum = IsAutoVacuumWorkerProcess();
msg.m_analyzetime = GetCurrentTimestamp(); msg.m_analyzetime = GetCurrentTimestamp();
msg.m_live_tuples = livetuples; msg.m_live_tuples = livetuples;
...@@ -4197,7 +4194,6 @@ pgstat_recv_vacuum(PgStat_MsgVacuum *msg, int len) ...@@ -4197,7 +4194,6 @@ pgstat_recv_vacuum(PgStat_MsgVacuum *msg, int len)
tabentry = pgstat_get_tab_entry(dbentry, msg->m_tableoid, true); tabentry = pgstat_get_tab_entry(dbentry, msg->m_tableoid, true);
if (msg->m_adopt_counts)
tabentry->n_live_tuples = msg->m_tuples; tabentry->n_live_tuples = msg->m_tuples;
/* Resetting dead_tuples to 0 is an approximation ... */ /* Resetting dead_tuples to 0 is an approximation ... */
tabentry->n_dead_tuples = 0; tabentry->n_dead_tuples = 0;
...@@ -4233,11 +4229,8 @@ pgstat_recv_analyze(PgStat_MsgAnalyze *msg, int len) ...@@ -4233,11 +4229,8 @@ pgstat_recv_analyze(PgStat_MsgAnalyze *msg, int len)
tabentry = pgstat_get_tab_entry(dbentry, msg->m_tableoid, true); tabentry = pgstat_get_tab_entry(dbentry, msg->m_tableoid, true);
if (msg->m_adopt_counts)
{
tabentry->n_live_tuples = msg->m_live_tuples; tabentry->n_live_tuples = msg->m_live_tuples;
tabentry->n_dead_tuples = msg->m_dead_tuples; tabentry->n_dead_tuples = msg->m_dead_tuples;
}
/* /*
* We reset changes_since_analyze to zero, forgetting any changes that * We reset changes_since_analyze to zero, forgetting any changes that
......
...@@ -142,6 +142,10 @@ extern void vacuum(VacuumStmt *vacstmt, Oid relid, bool do_toast, ...@@ -142,6 +142,10 @@ extern void vacuum(VacuumStmt *vacstmt, Oid relid, bool do_toast,
extern void vac_open_indexes(Relation relation, LOCKMODE lockmode, extern void vac_open_indexes(Relation relation, LOCKMODE lockmode,
int *nindexes, Relation **Irel); int *nindexes, Relation **Irel);
extern void vac_close_indexes(int nindexes, Relation *Irel, LOCKMODE lockmode); extern void vac_close_indexes(int nindexes, Relation *Irel, LOCKMODE lockmode);
extern double vac_estimate_reltuples(Relation relation, bool is_analyze,
BlockNumber total_pages,
BlockNumber scanned_pages,
double scanned_tuples);
extern void vac_update_relstats(Relation relation, extern void vac_update_relstats(Relation relation,
BlockNumber num_pages, BlockNumber num_pages,
double num_tuples, double num_tuples,
...@@ -157,10 +161,10 @@ extern void vacuum_delay_point(void); ...@@ -157,10 +161,10 @@ extern void vacuum_delay_point(void);
/* in commands/vacuumlazy.c */ /* in commands/vacuumlazy.c */
extern void lazy_vacuum_rel(Relation onerel, VacuumStmt *vacstmt, extern void lazy_vacuum_rel(Relation onerel, VacuumStmt *vacstmt,
BufferAccessStrategy bstrategy, bool *scanned_all); BufferAccessStrategy bstrategy);
/* in commands/analyze.c */ /* in commands/analyze.c */
extern void analyze_rel(Oid relid, VacuumStmt *vacstmt, extern void analyze_rel(Oid relid, VacuumStmt *vacstmt,
BufferAccessStrategy bstrategy, bool update_reltuples); BufferAccessStrategy bstrategy);
#endif /* VACUUM_H */ #endif /* VACUUM_H */
...@@ -322,7 +322,6 @@ typedef struct PgStat_MsgVacuum ...@@ -322,7 +322,6 @@ typedef struct PgStat_MsgVacuum
PgStat_MsgHdr m_hdr; PgStat_MsgHdr m_hdr;
Oid m_databaseid; Oid m_databaseid;
Oid m_tableoid; Oid m_tableoid;
bool m_adopt_counts;
bool m_autovacuum; bool m_autovacuum;
TimestampTz m_vacuumtime; TimestampTz m_vacuumtime;
PgStat_Counter m_tuples; PgStat_Counter m_tuples;
...@@ -339,7 +338,6 @@ typedef struct PgStat_MsgAnalyze ...@@ -339,7 +338,6 @@ typedef struct PgStat_MsgAnalyze
PgStat_MsgHdr m_hdr; PgStat_MsgHdr m_hdr;
Oid m_databaseid; Oid m_databaseid;
Oid m_tableoid; Oid m_tableoid;
bool m_adopt_counts;
bool m_autovacuum; bool m_autovacuum;
TimestampTz m_analyzetime; TimestampTz m_analyzetime;
PgStat_Counter m_live_tuples; PgStat_Counter m_live_tuples;
...@@ -706,9 +704,9 @@ extern void pgstat_reset_shared_counters(const char *); ...@@ -706,9 +704,9 @@ extern void pgstat_reset_shared_counters(const char *);
extern void pgstat_reset_single_counter(Oid objectid, PgStat_Single_Reset_Type type); extern void pgstat_reset_single_counter(Oid objectid, PgStat_Single_Reset_Type type);
extern void pgstat_report_autovac(Oid dboid); extern void pgstat_report_autovac(Oid dboid);
extern void pgstat_report_vacuum(Oid tableoid, bool shared, bool adopt_counts, extern void pgstat_report_vacuum(Oid tableoid, bool shared,
PgStat_Counter tuples); PgStat_Counter tuples);
extern void pgstat_report_analyze(Relation rel, bool adopt_counts, extern void pgstat_report_analyze(Relation rel,
PgStat_Counter livetuples, PgStat_Counter deadtuples); PgStat_Counter livetuples, PgStat_Counter deadtuples);
extern void pgstat_report_recovery_conflict(int reason); extern void pgstat_report_recovery_conflict(int reason);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment