Commit 77947c51 authored by Tom Lane's avatar Tom Lane

Fix up pgstats counting of live and dead tuples to recognize that committed

and aborted transactions have different effects; also teach it not to assume
that prepared transactions are always committed.

Along the way, simplify the pgstats API by tying counting directly to
Relations; I cannot detect any redeeming social value in having stats
pointers in HeapScanDesc and IndexScanDesc structures.  And fix a few
corner cases in which counts might be missed because the relation's
pgstat_info pointer hadn't been set.
parent cadb7833
...@@ -8,7 +8,7 @@ ...@@ -8,7 +8,7 @@
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
* *
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/access/gin/ginscan.c,v 1.9 2007/01/31 15:09:45 teodor Exp $ * $PostgreSQL: pgsql/src/backend/access/gin/ginscan.c,v 1.10 2007/05/27 03:50:38 tgl Exp $
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -189,7 +189,7 @@ newScanKey(IndexScanDesc scan) ...@@ -189,7 +189,7 @@ newScanKey(IndexScanDesc scan)
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED), (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("GIN index does not support search with void query"))); errmsg("GIN index does not support search with void query")));
pgstat_count_index_scan(&scan->xs_pgstat_info); pgstat_count_index_scan(scan->indexRelation);
} }
Datum Datum
......
...@@ -8,7 +8,7 @@ ...@@ -8,7 +8,7 @@
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
* *
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/access/gist/gistget.c,v 1.65 2007/04/06 22:33:41 tgl Exp $ * $PostgreSQL: pgsql/src/backend/access/gist/gistget.c,v 1.66 2007/05/27 03:50:38 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -165,7 +165,7 @@ gistnext(IndexScanDesc scan, ScanDirection dir, ItemPointer tids, ...@@ -165,7 +165,7 @@ gistnext(IndexScanDesc scan, ScanDirection dir, ItemPointer tids,
stk->next = NULL; stk->next = NULL;
stk->block = GIST_ROOT_BLKNO; stk->block = GIST_ROOT_BLKNO;
pgstat_count_index_scan(&scan->xs_pgstat_info); pgstat_count_index_scan(scan->indexRelation);
} }
else if (so->curbuf == InvalidBuffer) else if (so->curbuf == InvalidBuffer)
{ {
......
...@@ -8,7 +8,7 @@ ...@@ -8,7 +8,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/access/hash/hashsearch.c,v 1.49 2007/05/03 16:45:58 tgl Exp $ * $PostgreSQL: pgsql/src/backend/access/hash/hashsearch.c,v 1.50 2007/05/27 03:50:38 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -127,7 +127,7 @@ _hash_first(IndexScanDesc scan, ScanDirection dir) ...@@ -127,7 +127,7 @@ _hash_first(IndexScanDesc scan, ScanDirection dir)
ItemPointer current; ItemPointer current;
OffsetNumber offnum; OffsetNumber offnum;
pgstat_count_index_scan(&scan->xs_pgstat_info); pgstat_count_index_scan(rel);
current = &(so->hashso_curpos); current = &(so->hashso_curpos);
ItemPointerSetInvalid(current); ItemPointerSetInvalid(current);
......
...@@ -8,7 +8,7 @@ ...@@ -8,7 +8,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/access/heap/heapam.c,v 1.232 2007/04/08 01:26:27 tgl Exp $ * $PostgreSQL: pgsql/src/backend/access/heap/heapam.c,v 1.233 2007/05/27 03:50:38 tgl Exp $
* *
* *
* INTERFACE ROUTINES * INTERFACE ROUTINES
...@@ -100,7 +100,7 @@ initscan(HeapScanDesc scan, ScanKey key) ...@@ -100,7 +100,7 @@ initscan(HeapScanDesc scan, ScanKey key)
if (key != NULL) if (key != NULL)
memcpy(scan->rs_key, key, scan->rs_nkeys * sizeof(ScanKeyData)); memcpy(scan->rs_key, key, scan->rs_nkeys * sizeof(ScanKeyData));
pgstat_count_heap_scan(&scan->rs_pgstat_info); pgstat_count_heap_scan(scan->rs_rd);
} }
/* /*
...@@ -701,6 +701,8 @@ relation_open(Oid relationId, LOCKMODE lockmode) ...@@ -701,6 +701,8 @@ relation_open(Oid relationId, LOCKMODE lockmode)
if (!RelationIsValid(r)) if (!RelationIsValid(r))
elog(ERROR, "could not open relation with OID %u", relationId); elog(ERROR, "could not open relation with OID %u", relationId);
pgstat_initstats(r);
return r; return r;
} }
...@@ -743,6 +745,8 @@ try_relation_open(Oid relationId, LOCKMODE lockmode) ...@@ -743,6 +745,8 @@ try_relation_open(Oid relationId, LOCKMODE lockmode)
if (!RelationIsValid(r)) if (!RelationIsValid(r))
elog(ERROR, "could not open relation with OID %u", relationId); elog(ERROR, "could not open relation with OID %u", relationId);
pgstat_initstats(r);
return r; return r;
} }
...@@ -787,6 +791,8 @@ relation_open_nowait(Oid relationId, LOCKMODE lockmode) ...@@ -787,6 +791,8 @@ relation_open_nowait(Oid relationId, LOCKMODE lockmode)
if (!RelationIsValid(r)) if (!RelationIsValid(r))
elog(ERROR, "could not open relation with OID %u", relationId); elog(ERROR, "could not open relation with OID %u", relationId);
pgstat_initstats(r);
return r; return r;
} }
...@@ -873,8 +879,6 @@ heap_open(Oid relationId, LOCKMODE lockmode) ...@@ -873,8 +879,6 @@ heap_open(Oid relationId, LOCKMODE lockmode)
errmsg("\"%s\" is a composite type", errmsg("\"%s\" is a composite type",
RelationGetRelationName(r)))); RelationGetRelationName(r))));
pgstat_initstats(&r->pgstat_info, r);
return r; return r;
} }
...@@ -903,8 +907,6 @@ heap_openrv(const RangeVar *relation, LOCKMODE lockmode) ...@@ -903,8 +907,6 @@ heap_openrv(const RangeVar *relation, LOCKMODE lockmode)
errmsg("\"%s\" is a composite type", errmsg("\"%s\" is a composite type",
RelationGetRelationName(r)))); RelationGetRelationName(r))));
pgstat_initstats(&r->pgstat_info, r);
return r; return r;
} }
...@@ -954,8 +956,6 @@ heap_beginscan(Relation relation, Snapshot snapshot, ...@@ -954,8 +956,6 @@ heap_beginscan(Relation relation, Snapshot snapshot,
else else
scan->rs_key = NULL; scan->rs_key = NULL;
pgstat_initstats(&scan->rs_pgstat_info, relation);
initscan(scan, key); initscan(scan, key);
return scan; return scan;
...@@ -1059,7 +1059,7 @@ heap_getnext(HeapScanDesc scan, ScanDirection direction) ...@@ -1059,7 +1059,7 @@ heap_getnext(HeapScanDesc scan, ScanDirection direction)
*/ */
HEAPDEBUG_3; /* heap_getnext returning tuple */ HEAPDEBUG_3; /* heap_getnext returning tuple */
pgstat_count_heap_getnext(&scan->rs_pgstat_info); pgstat_count_heap_getnext(scan->rs_rd);
return &(scan->rs_ctup); return &(scan->rs_ctup);
} }
...@@ -1086,6 +1086,10 @@ heap_getnext(HeapScanDesc scan, ScanDirection direction) ...@@ -1086,6 +1086,10 @@ heap_getnext(HeapScanDesc scan, ScanDirection direction)
* and return it in *userbuf (so the caller must eventually unpin it); when * and return it in *userbuf (so the caller must eventually unpin it); when
* keep_buf = false, the pin is released and *userbuf is set to InvalidBuffer. * keep_buf = false, the pin is released and *userbuf is set to InvalidBuffer.
* *
* stats_relation is the relation to charge the heap_fetch operation against
* for statistical purposes. (This could be the heap rel itself, an
* associated index, or NULL to not count the fetch at all.)
*
* It is somewhat inconsistent that we ereport() on invalid block number but * It is somewhat inconsistent that we ereport() on invalid block number but
* return false on invalid item number. There are a couple of reasons though. * return false on invalid item number. There are a couple of reasons though.
* One is that the caller can relatively easily check the block number for * One is that the caller can relatively easily check the block number for
...@@ -1101,12 +1105,12 @@ heap_fetch(Relation relation, ...@@ -1101,12 +1105,12 @@ heap_fetch(Relation relation,
HeapTuple tuple, HeapTuple tuple,
Buffer *userbuf, Buffer *userbuf,
bool keep_buf, bool keep_buf,
PgStat_Info *pgstat_info) Relation stats_relation)
{ {
/* Assume *userbuf is undefined on entry */ /* Assume *userbuf is undefined on entry */
*userbuf = InvalidBuffer; *userbuf = InvalidBuffer;
return heap_release_fetch(relation, snapshot, tuple, return heap_release_fetch(relation, snapshot, tuple,
userbuf, keep_buf, pgstat_info); userbuf, keep_buf, stats_relation);
} }
/* /*
...@@ -1125,7 +1129,7 @@ heap_release_fetch(Relation relation, ...@@ -1125,7 +1129,7 @@ heap_release_fetch(Relation relation,
HeapTuple tuple, HeapTuple tuple,
Buffer *userbuf, Buffer *userbuf,
bool keep_buf, bool keep_buf,
PgStat_Info *pgstat_info) Relation stats_relation)
{ {
ItemPointer tid = &(tuple->t_self); ItemPointer tid = &(tuple->t_self);
ItemId lp; ItemId lp;
...@@ -1210,9 +1214,9 @@ heap_release_fetch(Relation relation, ...@@ -1210,9 +1214,9 @@ heap_release_fetch(Relation relation,
*/ */
*userbuf = buffer; *userbuf = buffer;
/* Count the successful fetch in *pgstat_info, if given. */ /* Count the successful fetch against appropriate rel, if any */
if (pgstat_info != NULL) if (stats_relation != NULL)
pgstat_count_heap_fetch(pgstat_info); pgstat_count_heap_fetch(stats_relation);
return true; return true;
} }
...@@ -1517,7 +1521,7 @@ heap_insert(Relation relation, HeapTuple tup, CommandId cid, ...@@ -1517,7 +1521,7 @@ heap_insert(Relation relation, HeapTuple tup, CommandId cid,
*/ */
CacheInvalidateHeapTuple(relation, heaptup); CacheInvalidateHeapTuple(relation, heaptup);
pgstat_count_heap_insert(&relation->pgstat_info); pgstat_count_heap_insert(relation);
/* /*
* If heaptup is a private copy, release it. Don't forget to copy t_self * If heaptup is a private copy, release it. Don't forget to copy t_self
...@@ -1807,7 +1811,7 @@ l1: ...@@ -1807,7 +1811,7 @@ l1:
if (have_tuple_lock) if (have_tuple_lock)
UnlockTuple(relation, &(tp.t_self), ExclusiveLock); UnlockTuple(relation, &(tp.t_self), ExclusiveLock);
pgstat_count_heap_delete(&relation->pgstat_info); pgstat_count_heap_delete(relation);
return HeapTupleMayBeUpdated; return HeapTupleMayBeUpdated;
} }
...@@ -2269,7 +2273,7 @@ l2: ...@@ -2269,7 +2273,7 @@ l2:
if (have_tuple_lock) if (have_tuple_lock)
UnlockTuple(relation, &(oldtup.t_self), ExclusiveLock); UnlockTuple(relation, &(oldtup.t_self), ExclusiveLock);
pgstat_count_heap_update(&relation->pgstat_info); pgstat_count_heap_update(relation);
/* /*
* If heaptup is a private copy, release it. Don't forget to copy t_self * If heaptup is a private copy, release it. Don't forget to copy t_self
......
...@@ -8,7 +8,7 @@ ...@@ -8,7 +8,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/access/index/genam.c,v 1.61 2007/01/20 18:43:35 neilc Exp $ * $PostgreSQL: pgsql/src/backend/access/index/genam.c,v 1.62 2007/05/27 03:50:38 tgl Exp $
* *
* NOTES * NOTES
* many of the old access method routines have been turned into * many of the old access method routines have been turned into
...@@ -96,8 +96,6 @@ RelationGetIndexScan(Relation indexRelation, ...@@ -96,8 +96,6 @@ RelationGetIndexScan(Relation indexRelation,
scan->xs_ctup.t_data = NULL; scan->xs_ctup.t_data = NULL;
scan->xs_cbuf = InvalidBuffer; scan->xs_cbuf = InvalidBuffer;
pgstat_initstats(&scan->xs_pgstat_info, indexRelation);
/* /*
* Let the AM fill in the key and any opaque data it wants. * Let the AM fill in the key and any opaque data it wants.
*/ */
......
...@@ -8,7 +8,7 @@ ...@@ -8,7 +8,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/access/index/indexam.c,v 1.97 2007/01/05 22:19:23 momjian Exp $ * $PostgreSQL: pgsql/src/backend/access/index/indexam.c,v 1.98 2007/05/27 03:50:38 tgl Exp $
* *
* INTERFACE ROUTINES * INTERFACE ROUTINES
* index_open - open an index relation by relation OID * index_open - open an index relation by relation OID
...@@ -145,8 +145,6 @@ index_open(Oid relationId, LOCKMODE lockmode) ...@@ -145,8 +145,6 @@ index_open(Oid relationId, LOCKMODE lockmode)
errmsg("\"%s\" is not an index", errmsg("\"%s\" is not an index",
RelationGetRelationName(r)))); RelationGetRelationName(r))));
pgstat_initstats(&r->pgstat_info, r);
return r; return r;
} }
...@@ -433,14 +431,14 @@ index_getnext(IndexScanDesc scan, ScanDirection direction) ...@@ -433,14 +431,14 @@ index_getnext(IndexScanDesc scan, ScanDirection direction)
return NULL; /* failure exit */ return NULL; /* failure exit */
} }
pgstat_count_index_tuples(&scan->xs_pgstat_info, 1); pgstat_count_index_tuples(scan->indexRelation, 1);
/* /*
* Fetch the heap tuple and see if it matches the snapshot. * Fetch the heap tuple and see if it matches the snapshot.
*/ */
if (heap_release_fetch(scan->heapRelation, scan->xs_snapshot, if (heap_release_fetch(scan->heapRelation, scan->xs_snapshot,
heapTuple, &scan->xs_cbuf, true, heapTuple, &scan->xs_cbuf, true,
&scan->xs_pgstat_info)) scan->indexRelation))
break; break;
/* Skip if no undeleted tuple at this location */ /* Skip if no undeleted tuple at this location */
...@@ -502,7 +500,7 @@ index_getnext_indexitem(IndexScanDesc scan, ...@@ -502,7 +500,7 @@ index_getnext_indexitem(IndexScanDesc scan,
Int32GetDatum(direction))); Int32GetDatum(direction)));
if (found) if (found)
pgstat_count_index_tuples(&scan->xs_pgstat_info, 1); pgstat_count_index_tuples(scan->indexRelation, 1);
return found; return found;
} }
...@@ -543,7 +541,7 @@ index_getmulti(IndexScanDesc scan, ...@@ -543,7 +541,7 @@ index_getmulti(IndexScanDesc scan,
Int32GetDatum(max_tids), Int32GetDatum(max_tids),
PointerGetDatum(returned_tids))); PointerGetDatum(returned_tids)));
pgstat_count_index_tuples(&scan->xs_pgstat_info, *returned_tids); pgstat_count_index_tuples(scan->indexRelation, *returned_tids);
return found; return found;
} }
......
...@@ -8,7 +8,7 @@ ...@@ -8,7 +8,7 @@
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
* *
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/access/nbtree/nbtsearch.c,v 1.112 2007/04/06 22:33:42 tgl Exp $ * $PostgreSQL: pgsql/src/backend/access/nbtree/nbtsearch.c,v 1.113 2007/05/27 03:50:39 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -453,7 +453,7 @@ _bt_first(IndexScanDesc scan, ScanDirection dir) ...@@ -453,7 +453,7 @@ _bt_first(IndexScanDesc scan, ScanDirection dir)
int i; int i;
StrategyNumber strat_total; StrategyNumber strat_total;
pgstat_count_index_scan(&scan->xs_pgstat_info); pgstat_count_index_scan(rel);
/* /*
* Examine the scan keys and eliminate any redundant keys; also mark the * Examine the scan keys and eliminate any redundant keys; also mark the
......
...@@ -7,7 +7,7 @@ ...@@ -7,7 +7,7 @@
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
* *
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/access/transam/twophase.c,v 1.30 2007/04/30 21:01:52 tgl Exp $ * $PostgreSQL: pgsql/src/backend/access/transam/twophase.c,v 1.31 2007/05/27 03:50:39 tgl Exp $
* *
* NOTES * NOTES
* Each global transaction is associated with a global transaction * Each global transaction is associated with a global transaction
...@@ -1211,7 +1211,8 @@ FinishPreparedTransaction(const char *gid, bool isCommit) ...@@ -1211,7 +1211,8 @@ FinishPreparedTransaction(const char *gid, bool isCommit)
else else
ProcessRecords(bufptr, xid, twophase_postabort_callbacks); ProcessRecords(bufptr, xid, twophase_postabort_callbacks);
pgstat_count_xact_commit(); /* Count the prepared xact as committed or aborted */
AtEOXact_PgStat(isCommit);
/* /*
* And now we can clean up our mess. * And now we can clean up our mess.
......
...@@ -8,7 +8,7 @@ ...@@ -8,7 +8,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/access/transam/twophase_rmgr.c,v 1.4 2007/01/05 22:19:23 momjian Exp $ * $PostgreSQL: pgsql/src/backend/access/transam/twophase_rmgr.c,v 1.5 2007/05/27 03:50:39 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -16,6 +16,7 @@ ...@@ -16,6 +16,7 @@
#include "access/twophase_rmgr.h" #include "access/twophase_rmgr.h"
#include "commands/async.h" #include "commands/async.h"
#include "pgstat.h"
#include "storage/lock.h" #include "storage/lock.h"
#include "utils/flatfiles.h" #include "utils/flatfiles.h"
#include "utils/inval.h" #include "utils/inval.h"
...@@ -27,7 +28,8 @@ const TwoPhaseCallback twophase_recover_callbacks[TWOPHASE_RM_MAX_ID + 1] = ...@@ -27,7 +28,8 @@ const TwoPhaseCallback twophase_recover_callbacks[TWOPHASE_RM_MAX_ID + 1] =
lock_twophase_recover, /* Lock */ lock_twophase_recover, /* Lock */
NULL, /* Inval */ NULL, /* Inval */
NULL, /* flat file update */ NULL, /* flat file update */
NULL /* notify/listen */ NULL, /* notify/listen */
NULL /* pgstat */
}; };
const TwoPhaseCallback twophase_postcommit_callbacks[TWOPHASE_RM_MAX_ID + 1] = const TwoPhaseCallback twophase_postcommit_callbacks[TWOPHASE_RM_MAX_ID + 1] =
...@@ -36,7 +38,8 @@ const TwoPhaseCallback twophase_postcommit_callbacks[TWOPHASE_RM_MAX_ID + 1] = ...@@ -36,7 +38,8 @@ const TwoPhaseCallback twophase_postcommit_callbacks[TWOPHASE_RM_MAX_ID + 1] =
lock_twophase_postcommit, /* Lock */ lock_twophase_postcommit, /* Lock */
inval_twophase_postcommit, /* Inval */ inval_twophase_postcommit, /* Inval */
flatfile_twophase_postcommit, /* flat file update */ flatfile_twophase_postcommit, /* flat file update */
notify_twophase_postcommit /* notify/listen */ notify_twophase_postcommit, /* notify/listen */
pgstat_twophase_postcommit /* pgstat */
}; };
const TwoPhaseCallback twophase_postabort_callbacks[TWOPHASE_RM_MAX_ID + 1] = const TwoPhaseCallback twophase_postabort_callbacks[TWOPHASE_RM_MAX_ID + 1] =
...@@ -45,5 +48,6 @@ const TwoPhaseCallback twophase_postabort_callbacks[TWOPHASE_RM_MAX_ID + 1] = ...@@ -45,5 +48,6 @@ const TwoPhaseCallback twophase_postabort_callbacks[TWOPHASE_RM_MAX_ID + 1] =
lock_twophase_postabort, /* Lock */ lock_twophase_postabort, /* Lock */
NULL, /* Inval */ NULL, /* Inval */
NULL, /* flat file update */ NULL, /* flat file update */
NULL /* notify/listen */ NULL, /* notify/listen */
pgstat_twophase_postabort /* pgstat */
}; };
...@@ -10,7 +10,7 @@ ...@@ -10,7 +10,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/access/transam/xact.c,v 1.242 2007/04/30 21:01:52 tgl Exp $ * $PostgreSQL: pgsql/src/backend/access/transam/xact.c,v 1.243 2007/05/27 03:50:39 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -1661,8 +1661,7 @@ CommitTransaction(void) ...@@ -1661,8 +1661,7 @@ CommitTransaction(void)
AtEOXact_Files(); AtEOXact_Files();
AtEOXact_ComboCid(); AtEOXact_ComboCid();
AtEOXact_HashTables(true); AtEOXact_HashTables(true);
pgstat_clear_snapshot(); AtEOXact_PgStat(true);
pgstat_count_xact_commit();
pgstat_report_txn_timestamp(0); pgstat_report_txn_timestamp(0);
CurrentResourceOwner = NULL; CurrentResourceOwner = NULL;
...@@ -1796,6 +1795,7 @@ PrepareTransaction(void) ...@@ -1796,6 +1795,7 @@ PrepareTransaction(void)
AtPrepare_UpdateFlatFiles(); AtPrepare_UpdateFlatFiles();
AtPrepare_Inval(); AtPrepare_Inval();
AtPrepare_Locks(); AtPrepare_Locks();
AtPrepare_PgStat();
/* /*
* Here is where we really truly prepare. * Here is where we really truly prepare.
...@@ -1853,6 +1853,8 @@ PrepareTransaction(void) ...@@ -1853,6 +1853,8 @@ PrepareTransaction(void)
/* notify and flatfiles don't need a postprepare call */ /* notify and flatfiles don't need a postprepare call */
PostPrepare_PgStat();
PostPrepare_Inval(); PostPrepare_Inval();
PostPrepare_smgr(); PostPrepare_smgr();
...@@ -1880,7 +1882,7 @@ PrepareTransaction(void) ...@@ -1880,7 +1882,7 @@ PrepareTransaction(void)
AtEOXact_Files(); AtEOXact_Files();
AtEOXact_ComboCid(); AtEOXact_ComboCid();
AtEOXact_HashTables(true); AtEOXact_HashTables(true);
pgstat_clear_snapshot(); /* don't call AtEOXact_PgStat here */
CurrentResourceOwner = NULL; CurrentResourceOwner = NULL;
ResourceOwnerDelete(TopTransactionResourceOwner); ResourceOwnerDelete(TopTransactionResourceOwner);
...@@ -2035,8 +2037,7 @@ AbortTransaction(void) ...@@ -2035,8 +2037,7 @@ AbortTransaction(void)
AtEOXact_Files(); AtEOXact_Files();
AtEOXact_ComboCid(); AtEOXact_ComboCid();
AtEOXact_HashTables(false); AtEOXact_HashTables(false);
pgstat_clear_snapshot(); AtEOXact_PgStat(false);
pgstat_count_xact_rollback();
pgstat_report_txn_timestamp(0); pgstat_report_txn_timestamp(0);
/* /*
...@@ -3749,6 +3750,7 @@ CommitSubTransaction(void) ...@@ -3749,6 +3750,7 @@ CommitSubTransaction(void)
AtEOSubXact_Files(true, s->subTransactionId, AtEOSubXact_Files(true, s->subTransactionId,
s->parent->subTransactionId); s->parent->subTransactionId);
AtEOSubXact_HashTables(true, s->nestingLevel); AtEOSubXact_HashTables(true, s->nestingLevel);
AtEOSubXact_PgStat(true, s->nestingLevel);
/* /*
* We need to restore the upper transaction's read-only state, in case the * We need to restore the upper transaction's read-only state, in case the
...@@ -3861,6 +3863,7 @@ AbortSubTransaction(void) ...@@ -3861,6 +3863,7 @@ AbortSubTransaction(void)
AtEOSubXact_Files(false, s->subTransactionId, AtEOSubXact_Files(false, s->subTransactionId,
s->parent->subTransactionId); s->parent->subTransactionId);
AtEOSubXact_HashTables(false, s->nestingLevel); AtEOSubXact_HashTables(false, s->nestingLevel);
AtEOSubXact_PgStat(false, s->nestingLevel);
} }
/* /*
......
...@@ -21,7 +21,7 @@ ...@@ -21,7 +21,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/executor/nodeBitmapHeapscan.c,v 1.16 2007/01/05 22:19:28 momjian Exp $ * $PostgreSQL: pgsql/src/backend/executor/nodeBitmapHeapscan.c,v 1.17 2007/05/27 03:50:39 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -189,7 +189,7 @@ BitmapHeapNext(BitmapHeapScanState *node) ...@@ -189,7 +189,7 @@ BitmapHeapNext(BitmapHeapScanState *node)
scan->rs_ctup.t_len = ItemIdGetLength(lp); scan->rs_ctup.t_len = ItemIdGetLength(lp);
ItemPointerSet(&scan->rs_ctup.t_self, tbmres->blockno, targoffset); ItemPointerSet(&scan->rs_ctup.t_self, tbmres->blockno, targoffset);
pgstat_count_heap_fetch(&scan->rs_pgstat_info); pgstat_count_heap_fetch(scan->rs_rd);
/* /*
* Set up the result slot to point to this tuple. Note that the slot * Set up the result slot to point to this tuple. Note that the slot
...@@ -389,7 +389,7 @@ ExecBitmapHeapReScan(BitmapHeapScanState *node, ExprContext *exprCtxt) ...@@ -389,7 +389,7 @@ ExecBitmapHeapReScan(BitmapHeapScanState *node, ExprContext *exprCtxt)
heap_rescan(node->ss.ss_currentScanDesc, NULL); heap_rescan(node->ss.ss_currentScanDesc, NULL);
/* undo bogus "seq scan" count (see notes in ExecInitBitmapHeapScan) */ /* undo bogus "seq scan" count (see notes in ExecInitBitmapHeapScan) */
pgstat_discount_heap_scan(&node->ss.ss_currentScanDesc->rs_pgstat_info); pgstat_discount_heap_scan(node->ss.ss_currentScanDesc->rs_rd);
if (node->tbm) if (node->tbm)
tbm_free(node->tbm); tbm_free(node->tbm);
...@@ -535,7 +535,7 @@ ExecInitBitmapHeapScan(BitmapHeapScan *node, EState *estate, int eflags) ...@@ -535,7 +535,7 @@ ExecInitBitmapHeapScan(BitmapHeapScan *node, EState *estate, int eflags)
* when we actually aren't doing any such thing. Reverse out the added * when we actually aren't doing any such thing. Reverse out the added
* scan count. (Eventually we may want to count bitmap scans separately.) * scan count. (Eventually we may want to count bitmap scans separately.)
*/ */
pgstat_discount_heap_scan(&scanstate->ss.ss_currentScanDesc->rs_pgstat_info); pgstat_discount_heap_scan(scanstate->ss.ss_currentScanDesc->rs_rd);
/* /*
* get the scan type from the relation descriptor. * get the scan type from the relation descriptor.
......
...@@ -37,7 +37,7 @@ ...@@ -37,7 +37,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/postmaster/bgwriter.c,v 1.37 2007/03/30 18:34:55 mha Exp $ * $PostgreSQL: pgsql/src/backend/postmaster/bgwriter.c,v 1.38 2007/05/27 03:50:39 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -125,13 +125,6 @@ typedef struct ...@@ -125,13 +125,6 @@ typedef struct
static BgWriterShmemStruct *BgWriterShmem; static BgWriterShmemStruct *BgWriterShmem;
/*
* BgWriter statistics counters.
* Stored directly in a stats message structure so it can be sent
* without needing to copy things around.
*/
PgStat_MsgBgWriter BgWriterStats;
/* /*
* GUC parameters * GUC parameters
*/ */
...@@ -250,11 +243,6 @@ BackgroundWriterMain(void) ...@@ -250,11 +243,6 @@ BackgroundWriterMain(void)
ALLOCSET_DEFAULT_MAXSIZE); ALLOCSET_DEFAULT_MAXSIZE);
MemoryContextSwitchTo(bgwriter_context); MemoryContextSwitchTo(bgwriter_context);
/*
* Initialize statistics counters to zero
*/
memset(&BgWriterStats, 0, sizeof(BgWriterStats));
/* /*
* If an exception is encountered, processing resumes here. * If an exception is encountered, processing resumes here.
* *
......
...@@ -13,7 +13,7 @@ ...@@ -13,7 +13,7 @@
* *
* Copyright (c) 2001-2007, PostgreSQL Global Development Group * Copyright (c) 2001-2007, PostgreSQL Global Development Group
* *
* $PostgreSQL: pgsql/src/backend/postmaster/pgstat.c,v 1.155 2007/04/30 16:37:08 tgl Exp $ * $PostgreSQL: pgsql/src/backend/postmaster/pgstat.c,v 1.156 2007/05/27 03:50:39 tgl Exp $
* ---------- * ----------
*/ */
#include "postgres.h" #include "postgres.h"
...@@ -39,6 +39,7 @@ ...@@ -39,6 +39,7 @@
#include "access/heapam.h" #include "access/heapam.h"
#include "access/transam.h" #include "access/transam.h"
#include "access/twophase_rmgr.h"
#include "access/xact.h" #include "access/xact.h"
#include "catalog/pg_database.h" #include "catalog/pg_database.h"
#include "libpq/ip.h" #include "libpq/ip.h"
...@@ -98,6 +99,13 @@ bool pgstat_collect_tuplelevel = false; ...@@ -98,6 +99,13 @@ bool pgstat_collect_tuplelevel = false;
bool pgstat_collect_blocklevel = false; bool pgstat_collect_blocklevel = false;
bool pgstat_collect_querystring = false; bool pgstat_collect_querystring = false;
/*
* BgWriter global statistics counters (unused in other processes).
* Stored directly in a stats message structure so it can be sent
* without needing to copy things around. We assume this inits to zeroes.
*/
PgStat_MsgBgWriter BgWriterStats;
/* ---------- /* ----------
* Local data * Local data
* ---------- * ----------
...@@ -111,43 +119,63 @@ static time_t last_pgstat_start_time; ...@@ -111,43 +119,63 @@ static time_t last_pgstat_start_time;
static bool pgStatRunningInCollector = false; static bool pgStatRunningInCollector = false;
/* /*
* Place where backends store per-table info to be sent to the collector. * Structures in which backends store per-table info that's waiting to be
* We store shared relations separately from non-shared ones, to be able to * sent to the collector.
* send them in separate messages.
* *
* NOTE: once allocated, a PgStat_MsgTabstat struct belonging to a * NOTE: once allocated, TabStatusArray structures are never moved or deleted
* TabStatArray is never moved or deleted for the life of the backend. * for the life of the backend. Also, we zero out the t_id fields of the
* Also, we zero out the t_id fields of the contained PgStat_TableEntry * contained PgStat_TableStatus structs whenever they are not actively in use.
* structs whenever they are not actively in use. This allows PgStat_Info * This allows relcache pgstat_info pointers to be treated as long-lived data,
* pointers to be treated as long-lived data, avoiding repeated searches in * avoiding repeated searches in pgstat_initstats() when a relation is
* pgstat_initstats() when a relation is repeatedly heap_open'd or * repeatedly opened during a transaction.
* index_open'd during a transaction.
*/ */
typedef struct TabStatArray #define TABSTAT_QUANTUM 100 /* we alloc this many at a time */
typedef struct TabStatusArray
{ {
int tsa_alloc; /* num allocated */ struct TabStatusArray *tsa_next; /* link to next array, if any */
int tsa_used; /* num actually used */ int tsa_used; /* # entries currently used */
PgStat_MsgTabstat **tsa_messages; /* the array itself */ PgStat_TableStatus tsa_entries[TABSTAT_QUANTUM]; /* per-table data */
} TabStatArray; } TabStatusArray;
#define TABSTAT_QUANTUM 4 /* we alloc this many at a time */ static TabStatusArray *pgStatTabList = NULL;
/*
* Tuple insertion/deletion counts for an open transaction can't be propagated
* into PgStat_TableStatus counters until we know if it is going to commit
* or abort. Hence, we keep these counts in per-subxact structs that live
* in TopTransactionContext. This data structure is designed on the assumption
* that subxacts won't usually modify very many tables.
*/
typedef struct PgStat_SubXactStatus
{
int nest_level; /* subtransaction nest level */
struct PgStat_SubXactStatus *prev; /* higher-level subxact if any */
PgStat_TableXactStatus *first; /* head of list for this subxact */
} PgStat_SubXactStatus;
static TabStatArray RegularTabStat = {0, 0, NULL}; static PgStat_SubXactStatus *pgStatXactStack = NULL;
static TabStatArray SharedTabStat = {0, 0, NULL};
static int pgStatXactCommit = 0; static int pgStatXactCommit = 0;
static int pgStatXactRollback = 0; static int pgStatXactRollback = 0;
/* Record that's written to 2PC state file when pgstat state is persisted */
typedef struct TwoPhasePgStatRecord
{
PgStat_Counter tuples_inserted; /* tuples inserted in xact */
PgStat_Counter tuples_deleted; /* tuples deleted in xact */
Oid t_id; /* table's OID */
bool t_shared; /* is it a shared catalog? */
} TwoPhasePgStatRecord;
/*
* Info about current "snapshot" of stats file
*/
static MemoryContext pgStatLocalContext = NULL; static MemoryContext pgStatLocalContext = NULL;
static HTAB *pgStatDBHash = NULL; static HTAB *pgStatDBHash = NULL;
static PgBackendStatus *localBackendStatusTable = NULL; static PgBackendStatus *localBackendStatusTable = NULL;
static int localNumBackends = 0; static int localNumBackends = 0;
/*
* BgWriter global statistics counters, from bgwriter.c
*/
extern PgStat_MsgBgWriter BgWriterStats;
/* /*
* Cluster wide statistics, kept in the stats collector. * Cluster wide statistics, kept in the stats collector.
* Contains statistics that are not collected per database * Contains statistics that are not collected per database
...@@ -177,9 +205,12 @@ static void pgstat_write_statsfile(void); ...@@ -177,9 +205,12 @@ static void pgstat_write_statsfile(void);
static HTAB *pgstat_read_statsfile(Oid onlydb); static HTAB *pgstat_read_statsfile(Oid onlydb);
static void backend_read_statsfile(void); static void backend_read_statsfile(void);
static void pgstat_read_current_status(void); static void pgstat_read_current_status(void);
static void pgstat_report_one_tabstat(TabStatArray *tsarr, Oid dbid);
static void pgstat_send_tabstat(PgStat_MsgTabstat *tsmsg);
static HTAB *pgstat_collect_oids(Oid catalogid); static HTAB *pgstat_collect_oids(Oid catalogid);
static PgStat_TableStatus *get_tabstat_entry(Oid rel_id, bool isshared);
static void pgstat_setup_memcxt(void); static void pgstat_setup_memcxt(void);
static void pgstat_setheader(PgStat_MsgHdr *hdr, StatMsgType mtype); static void pgstat_setheader(PgStat_MsgHdr *hdr, StatMsgType mtype);
...@@ -617,12 +648,19 @@ void allow_immediate_pgstat_restart(void) ...@@ -617,12 +648,19 @@ void allow_immediate_pgstat_restart(void)
void void
pgstat_report_tabstat(bool force) pgstat_report_tabstat(bool force)
{ {
/* we assume this inits to all zeroes: */
static const PgStat_TableCounts all_zeroes;
static TimestampTz last_report = 0; static TimestampTz last_report = 0;
TimestampTz now; TimestampTz now;
PgStat_MsgTabstat regular_msg;
PgStat_MsgTabstat shared_msg;
TabStatusArray *tsa;
int i;
/* Don't expend a clock check if nothing to do */ /* Don't expend a clock check if nothing to do */
if (RegularTabStat.tsa_used == 0 && if (pgStatTabList == NULL ||
SharedTabStat.tsa_used == 0) pgStatTabList->tsa_used == 0)
return; return;
/* /*
...@@ -636,51 +674,101 @@ pgstat_report_tabstat(bool force) ...@@ -636,51 +674,101 @@ pgstat_report_tabstat(bool force)
last_report = now; last_report = now;
/* /*
* For each message buffer used during the last queries, set the header * Scan through the TabStatusArray struct(s) to find tables that actually
* fields and send it out; then mark the entries unused. * have counts, and build messages to send. We have to separate shared
* relations from regular ones because the databaseid field in the
* message header has to depend on that.
*/
regular_msg.m_databaseid = MyDatabaseId;
shared_msg.m_databaseid = InvalidOid;
regular_msg.m_nentries = 0;
shared_msg.m_nentries = 0;
for (tsa = pgStatTabList; tsa != NULL; tsa = tsa->tsa_next)
{
for (i = 0; i < tsa->tsa_used; i++)
{
PgStat_TableStatus *entry = &tsa->tsa_entries[i];
PgStat_MsgTabstat *this_msg;
PgStat_TableEntry *this_ent;
/* Shouldn't have any pending transaction-dependent counts */
Assert(entry->trans == NULL);
/*
* Ignore entries that didn't accumulate any actual counts,
* such as indexes that were opened by the planner but not used.
*/
if (memcmp(&entry->t_counts, &all_zeroes,
sizeof(PgStat_TableCounts)) == 0)
continue;
/*
* OK, insert data into the appropriate message, and send if full.
*/ */
pgstat_report_one_tabstat(&RegularTabStat, MyDatabaseId); this_msg = entry->t_shared ? &shared_msg : &regular_msg;
pgstat_report_one_tabstat(&SharedTabStat, InvalidOid); this_ent = &this_msg->m_entry[this_msg->m_nentries];
this_ent->t_id = entry->t_id;
memcpy(&this_ent->t_counts, &entry->t_counts,
sizeof(PgStat_TableCounts));
if (++this_msg->m_nentries >= PGSTAT_NUM_TABENTRIES)
{
pgstat_send_tabstat(this_msg);
this_msg->m_nentries = 0;
}
}
/* zero out TableStatus structs after use */
MemSet(tsa->tsa_entries, 0,
tsa->tsa_used * sizeof(PgStat_TableStatus));
tsa->tsa_used = 0;
}
/*
* Send partial messages. If force is true, make sure that any pending
* xact commit/abort gets counted, even if no table stats to send.
*/
if (regular_msg.m_nentries > 0 ||
(force && (pgStatXactCommit > 0 || pgStatXactRollback > 0)))
pgstat_send_tabstat(&regular_msg);
if (shared_msg.m_nentries > 0)
pgstat_send_tabstat(&shared_msg);
} }
/*
* Subroutine for pgstat_report_tabstat: finish and send a tabstat message
*/
static void static void
pgstat_report_one_tabstat(TabStatArray *tsarr, Oid dbid) pgstat_send_tabstat(PgStat_MsgTabstat *tsmsg)
{ {
int i;
for (i = 0; i < tsarr->tsa_used; i++)
{
PgStat_MsgTabstat *tsmsg = tsarr->tsa_messages[i];
int n; int n;
int len; int len;
n = tsmsg->m_nentries; /* It's unlikely we'd get here with no socket, but maybe not impossible */
len = offsetof(PgStat_MsgTabstat, m_entry[0]) + if (pgStatSock < 0)
n * sizeof(PgStat_TableEntry); return;
/*
* Report accumulated xact commit/rollback whenever we send a normal
* tabstat message
*/
if (OidIsValid(tsmsg->m_databaseid))
{
tsmsg->m_xact_commit = pgStatXactCommit; tsmsg->m_xact_commit = pgStatXactCommit;
tsmsg->m_xact_rollback = pgStatXactRollback; tsmsg->m_xact_rollback = pgStatXactRollback;
pgStatXactCommit = 0; pgStatXactCommit = 0;
pgStatXactRollback = 0; pgStatXactRollback = 0;
}
/* else
* It's unlikely we'd get here with no socket, but maybe not
* impossible
*/
if (pgStatSock >= 0)
{ {
pgstat_setheader(&tsmsg->m_hdr, PGSTAT_MTYPE_TABSTAT); tsmsg->m_xact_commit = 0;
tsmsg->m_databaseid = dbid; tsmsg->m_xact_rollback = 0;
pgstat_send(tsmsg, len);
} }
/* n = tsmsg->m_nentries;
* Zero out the entries, to mark them unused and prepare them len = offsetof(PgStat_MsgTabstat, m_entry[0]) +
* for next use. n * sizeof(PgStat_TableEntry);
*/
MemSet(tsmsg, 0, len); pgstat_setheader(&tsmsg->m_hdr, PGSTAT_MTYPE_TABSTAT);
} pgstat_send(tsmsg, len);
tsarr->tsa_used = 0;
} }
...@@ -1016,207 +1104,487 @@ pgstat_ping(void) ...@@ -1016,207 +1104,487 @@ pgstat_ping(void)
pgstat_send(&msg, sizeof(msg)); pgstat_send(&msg, sizeof(msg));
} }
/*
* Enlarge a TabStatArray
*/
static void
more_tabstat_space(TabStatArray *tsarr)
{
PgStat_MsgTabstat *newMessages;
PgStat_MsgTabstat **msgArray;
int newAlloc;
int i;
AssertArg(PointerIsValid(tsarr));
newAlloc = tsarr->tsa_alloc + TABSTAT_QUANTUM;
/* Create (another) quantum of message buffers, and zero them */
newMessages = (PgStat_MsgTabstat *)
MemoryContextAllocZero(TopMemoryContext,
sizeof(PgStat_MsgTabstat) * TABSTAT_QUANTUM);
/* Create or enlarge the pointer array */
if (tsarr->tsa_messages == NULL)
msgArray = (PgStat_MsgTabstat **)
MemoryContextAlloc(TopMemoryContext,
sizeof(PgStat_MsgTabstat *) * newAlloc);
else
msgArray = (PgStat_MsgTabstat **)
repalloc(tsarr->tsa_messages,
sizeof(PgStat_MsgTabstat *) * newAlloc);
for (i = 0; i < TABSTAT_QUANTUM; i++)
msgArray[tsarr->tsa_alloc + i] = newMessages++;
tsarr->tsa_messages = msgArray;
tsarr->tsa_alloc = newAlloc;
Assert(tsarr->tsa_used < tsarr->tsa_alloc);
}
/* ---------- /* ----------
* pgstat_initstats() - * pgstat_initstats() -
* *
* Called from various places usually dealing with initialization * Initialize a relcache entry to count access statistics.
* of Relation or Scan structures. The data placed into these * Called whenever a relation is opened.
* structures from here tell where later to count for buffer reads,
* scans and tuples fetched.
*
* NOTE: PgStat_Info pointers in scan structures are really redundant
* with those in relcache entries. The passed stats pointer might point
* either to the Relation struct's own pgstat_info field, or to one in
* a scan structure; we'll set the Relation pg_statinfo and copy it to
* the scan struct.
* *
* We assume that a relcache entry's pgstat_info field is zeroed by * We assume that a relcache entry's pgstat_info field is zeroed by
* relcache.c when the relcache entry is made; thereafter it is long-lived * relcache.c when the relcache entry is made; thereafter it is long-lived
* data. We can avoid repeated searches of the TabStat arrays when the * data. We can avoid repeated searches of the TabStatus arrays when the
* same relation is touched repeatedly within a transaction. * same relation is touched repeatedly within a transaction.
* ---------- * ----------
*/ */
void void
pgstat_initstats(PgStat_Info *stats, Relation rel) pgstat_initstats(Relation rel)
{ {
Oid rel_id = rel->rd_id; Oid rel_id = rel->rd_id;
PgStat_TableEntry *useent; char relkind = rel->rd_rel->relkind;
TabStatArray *tsarr;
PgStat_MsgTabstat *tsmsg; /* We only count stats for things that have storage */
int mb; if (!(relkind == RELKIND_RELATION ||
int i; relkind == RELKIND_INDEX ||
relkind == RELKIND_TOASTVALUE))
{
rel->pgstat_info = NULL;
return;
}
if (pgStatSock < 0 || if (pgStatSock < 0 ||
!(pgstat_collect_tuplelevel || !(pgstat_collect_tuplelevel ||
pgstat_collect_blocklevel)) pgstat_collect_blocklevel))
{ {
/* We're not counting at all. */ /* We're not counting at all */
stats->tabentry = NULL; rel->pgstat_info = NULL;
return; return;
} }
/* /*
* If we already set up this relation in the current transaction, * If we already set up this relation in the current transaction,
* just copy the pointer. * nothing to do.
*/ */
if (rel->pgstat_info.tabentry != NULL && if (rel->pgstat_info != NULL &&
((PgStat_TableEntry *) rel->pgstat_info.tabentry)->t_id == rel_id) rel->pgstat_info->t_id == rel_id)
{
stats->tabentry = rel->pgstat_info.tabentry;
return; return;
}
/* /* Else find or make the PgStat_TableStatus entry, and update link */
* Search the already-used message slots for this relation. rel->pgstat_info = get_tabstat_entry(rel_id, rel->rd_rel->relisshared);
}
/*
* get_tabstat_entry - find or create a PgStat_TableStatus entry for rel
*/ */
tsarr = rel->rd_rel->relisshared ? &SharedTabStat : &RegularTabStat; static PgStat_TableStatus *
get_tabstat_entry(Oid rel_id, bool isshared)
{
PgStat_TableStatus *entry;
TabStatusArray *tsa;
TabStatusArray *prev_tsa;
int i;
for (mb = 0; mb < tsarr->tsa_used; mb++) /*
* Search the already-used tabstat slots for this relation.
*/
prev_tsa = NULL;
for (tsa = pgStatTabList; tsa != NULL; prev_tsa = tsa, tsa = tsa->tsa_next)
{ {
tsmsg = tsarr->tsa_messages[mb]; for (i = 0; i < tsa->tsa_used; i++)
for (i = tsmsg->m_nentries; --i >= 0;)
{ {
if (tsmsg->m_entry[i].t_id == rel_id) entry = &tsa->tsa_entries[i];
if (entry->t_id == rel_id)
return entry;
}
if (tsa->tsa_used < TABSTAT_QUANTUM)
{ {
rel->pgstat_info.tabentry = (void *) &(tsmsg->m_entry[i]); /*
stats->tabentry = rel->pgstat_info.tabentry; * It must not be present, but we found a free slot instead.
return; * Fine, let's use this one. We assume the entry was already
* zeroed, either at creation or after last use.
*/
entry = &tsa->tsa_entries[tsa->tsa_used++];
entry->t_id = rel_id;
entry->t_shared = isshared;
return entry;
} }
} }
if (tsmsg->m_nentries >= PGSTAT_NUM_TABENTRIES) /*
continue; * We ran out of tabstat slots, so allocate more. Be sure they're zeroed.
*/
tsa = (TabStatusArray *) MemoryContextAllocZero(TopMemoryContext,
sizeof(TabStatusArray));
if (prev_tsa)
prev_tsa->tsa_next = tsa;
else
pgStatTabList = tsa;
/* /*
* Not found, but found a message buffer with an empty slot instead. * Use the first entry of the new TabStatusArray.
* Fine, let's use this one. We assume the entry was already zeroed,
* either at creation or after last use.
*/ */
i = tsmsg->m_nentries++; entry = &tsa->tsa_entries[tsa->tsa_used++];
useent = &tsmsg->m_entry[i]; entry->t_id = rel_id;
useent->t_id = rel_id; entry->t_shared = isshared;
rel->pgstat_info.tabentry = (void *) useent; return entry;
stats->tabentry = rel->pgstat_info.tabentry; }
return;
/*
* get_tabstat_stack_level - add a new (sub)transaction stack entry if needed
*/
static PgStat_SubXactStatus *
get_tabstat_stack_level(int nest_level)
{
PgStat_SubXactStatus *xact_state;
xact_state = pgStatXactStack;
if (xact_state == NULL || xact_state->nest_level != nest_level)
{
xact_state = (PgStat_SubXactStatus *)
MemoryContextAlloc(TopTransactionContext,
sizeof(PgStat_SubXactStatus));
xact_state->nest_level = nest_level;
xact_state->prev = pgStatXactStack;
xact_state->first = NULL;
pgStatXactStack = xact_state;
} }
return xact_state;
}
/* /*
* If we ran out of message buffers, we just allocate more. * add_tabstat_xact_level - add a new (sub)transaction state record
*/ */
if (tsarr->tsa_used >= tsarr->tsa_alloc) static void
more_tabstat_space(tsarr); add_tabstat_xact_level(PgStat_TableStatus *pgstat_info, int nest_level)
{
PgStat_SubXactStatus *xact_state;
PgStat_TableXactStatus *trans;
/* /*
* Use the first entry of the next message buffer. * If this is the first rel to be modified at the current nest level,
* we first have to push a transaction stack entry.
*/
xact_state = get_tabstat_stack_level(nest_level);
/* Now make a per-table stack entry */
trans = (PgStat_TableXactStatus *)
MemoryContextAllocZero(TopTransactionContext,
sizeof(PgStat_TableXactStatus));
trans->nest_level = nest_level;
trans->upper = pgstat_info->trans;
trans->parent = pgstat_info;
trans->next = xact_state->first;
xact_state->first = trans;
pgstat_info->trans = trans;
}
/*
* pgstat_count_heap_insert - count a tuple insertion
*/
void
pgstat_count_heap_insert(Relation rel)
{
PgStat_TableStatus *pgstat_info = rel->pgstat_info;
if (pgstat_collect_tuplelevel && pgstat_info != NULL)
{
int nest_level = GetCurrentTransactionNestLevel();
/* t_tuples_inserted is nontransactional, so just advance it */
pgstat_info->t_counts.t_tuples_inserted++;
/* We have to log the transactional effect at the proper level */
if (pgstat_info->trans == NULL ||
pgstat_info->trans->nest_level != nest_level)
add_tabstat_xact_level(pgstat_info, nest_level);
pgstat_info->trans->tuples_inserted++;
}
}
/*
* pgstat_count_heap_update - count a tuple update
*/
void
pgstat_count_heap_update(Relation rel)
{
PgStat_TableStatus *pgstat_info = rel->pgstat_info;
if (pgstat_collect_tuplelevel && pgstat_info != NULL)
{
int nest_level = GetCurrentTransactionNestLevel();
/* t_tuples_updated is nontransactional, so just advance it */
pgstat_info->t_counts.t_tuples_updated++;
/* We have to log the transactional effect at the proper level */
if (pgstat_info->trans == NULL ||
pgstat_info->trans->nest_level != nest_level)
add_tabstat_xact_level(pgstat_info, nest_level);
/* An UPDATE both inserts a new tuple and deletes the old */
pgstat_info->trans->tuples_inserted++;
pgstat_info->trans->tuples_deleted++;
}
}
/*
* pgstat_count_heap_delete - count a tuple deletion
*/ */
mb = tsarr->tsa_used++; void
tsmsg = tsarr->tsa_messages[mb]; pgstat_count_heap_delete(Relation rel)
tsmsg->m_nentries = 1; {
useent = &tsmsg->m_entry[0]; PgStat_TableStatus *pgstat_info = rel->pgstat_info;
useent->t_id = rel_id;
rel->pgstat_info.tabentry = (void *) useent; if (pgstat_collect_tuplelevel && pgstat_info != NULL)
stats->tabentry = rel->pgstat_info.tabentry; {
int nest_level = GetCurrentTransactionNestLevel();
/* t_tuples_deleted is nontransactional, so just advance it */
pgstat_info->t_counts.t_tuples_deleted++;
/* We have to log the transactional effect at the proper level */
if (pgstat_info->trans == NULL ||
pgstat_info->trans->nest_level != nest_level)
add_tabstat_xact_level(pgstat_info, nest_level);
pgstat_info->trans->tuples_deleted++;
}
} }
/* ---------- /* ----------
* pgstat_count_xact_commit() - * AtEOXact_PgStat
* *
* Called from access/transam/xact.c to count transaction commits. * Called from access/transam/xact.c at top-level transaction commit/abort.
* ---------- * ----------
*/ */
void void
pgstat_count_xact_commit(void) AtEOXact_PgStat(bool isCommit)
{ {
if (!pgstat_collect_tuplelevel && PgStat_SubXactStatus *xact_state;
!pgstat_collect_blocklevel)
return;
/*
* Count transaction commit or abort. (We use counters, not just bools,
* in case the reporting message isn't sent right away.)
*/
if (isCommit)
pgStatXactCommit++; pgStatXactCommit++;
else
pgStatXactRollback++;
/* /*
* If there was no relation activity yet, just make one existing message * Transfer transactional insert/update counts into the base tabstat
* buffer used without slots, causing the next report to tell new * entries. We don't bother to free any of the transactional state,
* xact-counters. * since it's all in TopTransactionContext and will go away anyway.
*/ */
if (RegularTabStat.tsa_alloc == 0) xact_state = pgStatXactStack;
more_tabstat_space(&RegularTabStat); if (xact_state != NULL)
{
PgStat_TableXactStatus *trans;
if (RegularTabStat.tsa_used == 0) Assert(xact_state->nest_level == 1);
Assert(xact_state->prev == NULL);
for (trans = xact_state->first; trans != NULL; trans = trans->next)
{ {
RegularTabStat.tsa_used++; PgStat_TableStatus *tabstat;
RegularTabStat.tsa_messages[0]->m_nentries = 0;
Assert(trans->nest_level == 1);
Assert(trans->upper == NULL);
tabstat = trans->parent;
Assert(tabstat->trans == trans);
if (isCommit)
{
tabstat->t_counts.t_new_live_tuples += trans->tuples_inserted;
tabstat->t_counts.t_new_dead_tuples += trans->tuples_deleted;
} }
} else
{
/* inserted tuples are dead, deleted tuples are unaffected */
tabstat->t_counts.t_new_dead_tuples += trans->tuples_inserted;
}
tabstat->trans = NULL;
}
}
pgStatXactStack = NULL;
/* Make sure any stats snapshot is thrown away */
pgstat_clear_snapshot();
}
/* ---------- /* ----------
* pgstat_count_xact_rollback() - * AtEOSubXact_PgStat
* *
* Called from access/transam/xact.c to count transaction rollbacks. * Called from access/transam/xact.c at subtransaction commit/abort.
* ---------- * ----------
*/ */
void void
pgstat_count_xact_rollback(void) AtEOSubXact_PgStat(bool isCommit, int nestDepth)
{ {
if (!pgstat_collect_tuplelevel && PgStat_SubXactStatus *xact_state;
!pgstat_collect_blocklevel)
return;
pgStatXactRollback++; /*
* Transfer transactional insert/update counts into the next higher
* subtransaction state.
*/
xact_state = pgStatXactStack;
if (xact_state != NULL &&
xact_state->nest_level >= nestDepth)
{
PgStat_TableXactStatus *trans;
PgStat_TableXactStatus *next_trans;
/* delink xact_state from stack immediately to simplify reuse case */
pgStatXactStack = xact_state->prev;
for (trans = xact_state->first; trans != NULL; trans = next_trans)
{
PgStat_TableStatus *tabstat;
next_trans = trans->next;
Assert(trans->nest_level == nestDepth);
tabstat = trans->parent;
Assert(tabstat->trans == trans);
if (isCommit)
{
if (trans->upper && trans->upper->nest_level == nestDepth - 1)
{
trans->upper->tuples_inserted += trans->tuples_inserted;
trans->upper->tuples_deleted += trans->tuples_deleted;
tabstat->trans = trans->upper;
pfree(trans);
}
else
{
/* /*
* If there was no relation activity yet, just make one existing message * When there isn't an immediate parent state, we can
* buffer used without slots, causing the next report to tell new * just reuse the record instead of going through a
* xact-counters. * palloc/pfree pushup (this works since it's all in
* TopTransactionContext anyway). We have to re-link
* it into the parent level, though, and that might mean
* pushing a new entry into the pgStatXactStack.
*/ */
if (RegularTabStat.tsa_alloc == 0) PgStat_SubXactStatus *upper_xact_state;
more_tabstat_space(&RegularTabStat);
if (RegularTabStat.tsa_used == 0) upper_xact_state = get_tabstat_stack_level(nestDepth - 1);
trans->next = upper_xact_state->first;
upper_xact_state->first = trans;
trans->nest_level = nestDepth - 1;
}
}
else
{ {
RegularTabStat.tsa_used++; /*
RegularTabStat.tsa_messages[0]->m_nentries = 0; * On abort, inserted tuples are dead (and can be bounced out
* to the top-level tabstat), deleted tuples are unaffected
*/
tabstat->t_counts.t_new_dead_tuples += trans->tuples_inserted;
tabstat->trans = trans->upper;
pfree(trans);
} }
}
pfree(xact_state);
}
}
/*
* AtPrepare_PgStat
* Save the transactional stats state at 2PC transaction prepare.
*
* In this phase we just generate 2PC records for all the pending
* transaction-dependent stats work.
*/
void
AtPrepare_PgStat(void)
{
PgStat_SubXactStatus *xact_state;
xact_state = pgStatXactStack;
if (xact_state != NULL)
{
PgStat_TableXactStatus *trans;
Assert(xact_state->nest_level == 1);
Assert(xact_state->prev == NULL);
for (trans = xact_state->first; trans != NULL; trans = trans->next)
{
PgStat_TableStatus *tabstat;
TwoPhasePgStatRecord record;
Assert(trans->nest_level == 1);
Assert(trans->upper == NULL);
tabstat = trans->parent;
Assert(tabstat->trans == trans);
record.tuples_inserted = trans->tuples_inserted;
record.tuples_deleted = trans->tuples_deleted;
record.t_id = tabstat->t_id;
record.t_shared = tabstat->t_shared;
RegisterTwoPhaseRecord(TWOPHASE_RM_PGSTAT_ID, 0,
&record, sizeof(TwoPhasePgStatRecord));
}
}
}
/*
* PostPrepare_PgStat
* Clean up after successful PREPARE.
*
* All we need do here is unlink the transaction stats state from the
* nontransactional state. The nontransactional action counts will be
* reported to the stats collector immediately, while the effects on live
* and dead tuple counts are preserved in the 2PC state file.
*
* Note: AtEOXact_PgStat is not called during PREPARE.
*/
void
PostPrepare_PgStat(void)
{
PgStat_SubXactStatus *xact_state;
/*
* We don't bother to free any of the transactional state,
* since it's all in TopTransactionContext and will go away anyway.
*/
xact_state = pgStatXactStack;
if (xact_state != NULL)
{
PgStat_TableXactStatus *trans;
for (trans = xact_state->first; trans != NULL; trans = trans->next)
{
PgStat_TableStatus *tabstat;
tabstat = trans->parent;
tabstat->trans = NULL;
}
}
pgStatXactStack = NULL;
/* Make sure any stats snapshot is thrown away */
pgstat_clear_snapshot();
}
/*
* 2PC processing routine for COMMIT PREPARED case.
*
* Load the saved counts into our local pgstats state.
*/
void
pgstat_twophase_postcommit(TransactionId xid, uint16 info,
void *recdata, uint32 len)
{
TwoPhasePgStatRecord *rec = (TwoPhasePgStatRecord *) recdata;
PgStat_TableStatus *pgstat_info;
/* Find or create a tabstat entry for the rel */
pgstat_info = get_tabstat_entry(rec->t_id, rec->t_shared);
pgstat_info->t_counts.t_new_live_tuples += rec->tuples_inserted;
pgstat_info->t_counts.t_new_dead_tuples += rec->tuples_deleted;
}
/*
* 2PC processing routine for ROLLBACK PREPARED case.
*
* Load the saved counts into our local pgstats state, but treat them
* as aborted.
*/
void
pgstat_twophase_postabort(TransactionId xid, uint16 info,
void *recdata, uint32 len)
{
TwoPhasePgStatRecord *rec = (TwoPhasePgStatRecord *) recdata;
PgStat_TableStatus *pgstat_info;
/* Find or create a tabstat entry for the rel */
pgstat_info = get_tabstat_entry(rec->t_id, rec->t_shared);
/* inserted tuples are dead, deleted tuples are no-ops */
pgstat_info->t_counts.t_new_dead_tuples += rec->tuples_inserted;
} }
...@@ -1725,18 +2093,15 @@ pgstat_send(void *msg, int len) ...@@ -1725,18 +2093,15 @@ pgstat_send(void *msg, int len)
void void
pgstat_send_bgwriter(void) pgstat_send_bgwriter(void)
{ {
/* We assume this initializes to zeroes */
static const PgStat_MsgBgWriter all_zeroes;
/* /*
* This function can be called even if nothing at all has happened. * This function can be called even if nothing at all has happened.
* In this case, avoid sending a completely empty message to * In this case, avoid sending a completely empty message to
* the stats collector. * the stats collector.
*/ */
if (BgWriterStats.m_timed_checkpoints == 0 && if (memcmp(&BgWriterStats, &all_zeroes, sizeof(PgStat_MsgBgWriter)) == 0)
BgWriterStats.m_requested_checkpoints == 0 &&
BgWriterStats.m_buf_written_checkpoints == 0 &&
BgWriterStats.m_buf_written_lru == 0 &&
BgWriterStats.m_buf_written_all == 0 &&
BgWriterStats.m_maxwritten_lru == 0 &&
BgWriterStats.m_maxwritten_all == 0)
return; return;
/* /*
...@@ -1746,10 +2111,9 @@ pgstat_send_bgwriter(void) ...@@ -1746,10 +2111,9 @@ pgstat_send_bgwriter(void)
pgstat_send(&BgWriterStats, sizeof(BgWriterStats)); pgstat_send(&BgWriterStats, sizeof(BgWriterStats));
/* /*
* Clear out the bgwriter statistics buffer, so it can be * Clear out the statistics buffer, so it can be re-used.
* re-used.
*/ */
memset(&BgWriterStats, 0, sizeof(BgWriterStats)); MemSet(&BgWriterStats, 0, sizeof(BgWriterStats));
} }
...@@ -2509,60 +2873,50 @@ pgstat_recv_tabstat(PgStat_MsgTabstat *msg, int len) ...@@ -2509,60 +2873,50 @@ pgstat_recv_tabstat(PgStat_MsgTabstat *msg, int len)
* If it's a new table entry, initialize counters to the values we * If it's a new table entry, initialize counters to the values we
* just got. * just got.
*/ */
tabentry->numscans = tabmsg[i].t_numscans; tabentry->numscans = tabmsg[i].t_counts.t_numscans;
tabentry->tuples_returned = tabmsg[i].t_tuples_returned; tabentry->tuples_returned = tabmsg[i].t_counts.t_tuples_returned;
tabentry->tuples_fetched = tabmsg[i].t_tuples_fetched; tabentry->tuples_fetched = tabmsg[i].t_counts.t_tuples_fetched;
tabentry->tuples_inserted = tabmsg[i].t_tuples_inserted; tabentry->tuples_inserted = tabmsg[i].t_counts.t_tuples_inserted;
tabentry->tuples_updated = tabmsg[i].t_tuples_updated; tabentry->tuples_updated = tabmsg[i].t_counts.t_tuples_updated;
tabentry->tuples_deleted = tabmsg[i].t_tuples_deleted; tabentry->tuples_deleted = tabmsg[i].t_counts.t_tuples_deleted;
tabentry->n_live_tuples = tabmsg[i].t_counts.t_new_live_tuples;
tabentry->n_dead_tuples = tabmsg[i].t_counts.t_new_dead_tuples;
tabentry->blocks_fetched = tabmsg[i].t_counts.t_blocks_fetched;
tabentry->blocks_hit = tabmsg[i].t_counts.t_blocks_hit;
tabentry->n_live_tuples = tabmsg[i].t_tuples_inserted;
tabentry->n_dead_tuples = tabmsg[i].t_tuples_updated +
tabmsg[i].t_tuples_deleted;
tabentry->last_anl_tuples = 0; tabentry->last_anl_tuples = 0;
tabentry->vacuum_timestamp = 0; tabentry->vacuum_timestamp = 0;
tabentry->autovac_vacuum_timestamp = 0; tabentry->autovac_vacuum_timestamp = 0;
tabentry->analyze_timestamp = 0; tabentry->analyze_timestamp = 0;
tabentry->autovac_analyze_timestamp = 0; tabentry->autovac_analyze_timestamp = 0;
tabentry->blocks_fetched = tabmsg[i].t_blocks_fetched;
tabentry->blocks_hit = tabmsg[i].t_blocks_hit;
} }
else else
{ {
/* /*
* Otherwise add the values to the existing entry. * Otherwise add the values to the existing entry.
*/ */
tabentry->numscans += tabmsg[i].t_numscans; tabentry->numscans += tabmsg[i].t_counts.t_numscans;
tabentry->tuples_returned += tabmsg[i].t_tuples_returned; tabentry->tuples_returned += tabmsg[i].t_counts.t_tuples_returned;
tabentry->tuples_fetched += tabmsg[i].t_tuples_fetched; tabentry->tuples_fetched += tabmsg[i].t_counts.t_tuples_fetched;
tabentry->tuples_inserted += tabmsg[i].t_tuples_inserted; tabentry->tuples_inserted += tabmsg[i].t_counts.t_tuples_inserted;
tabentry->tuples_updated += tabmsg[i].t_tuples_updated; tabentry->tuples_updated += tabmsg[i].t_counts.t_tuples_updated;
tabentry->tuples_deleted += tabmsg[i].t_tuples_deleted; tabentry->tuples_deleted += tabmsg[i].t_counts.t_tuples_deleted;
tabentry->n_live_tuples += tabmsg[i].t_counts.t_new_live_tuples;
tabentry->n_live_tuples += tabmsg[i].t_tuples_inserted - tabentry->n_dead_tuples += tabmsg[i].t_counts.t_new_dead_tuples;
tabmsg[i].t_tuples_deleted; tabentry->blocks_fetched += tabmsg[i].t_counts.t_blocks_fetched;
tabentry->n_dead_tuples += tabmsg[i].t_tuples_updated + tabentry->blocks_hit += tabmsg[i].t_counts.t_blocks_hit;
tabmsg[i].t_tuples_deleted;
tabentry->blocks_fetched += tabmsg[i].t_blocks_fetched;
tabentry->blocks_hit += tabmsg[i].t_blocks_hit;
} }
/* /*
* Add table stats to the database entry. * Add per-table stats to the per-database entry, too.
*/
dbentry->n_tuples_returned += tabmsg[i].t_tuples_returned;
dbentry->n_tuples_fetched += tabmsg[i].t_tuples_fetched;
dbentry->n_tuples_inserted += tabmsg[i].t_tuples_inserted;
dbentry->n_tuples_updated += tabmsg[i].t_tuples_updated;
dbentry->n_tuples_deleted += tabmsg[i].t_tuples_deleted;
/*
* And add the block IO to the database entry.
*/ */
dbentry->n_blocks_fetched += tabmsg[i].t_blocks_fetched; dbentry->n_tuples_returned += tabmsg[i].t_counts.t_tuples_returned;
dbentry->n_blocks_hit += tabmsg[i].t_blocks_hit; dbentry->n_tuples_fetched += tabmsg[i].t_counts.t_tuples_fetched;
dbentry->n_tuples_inserted += tabmsg[i].t_counts.t_tuples_inserted;
dbentry->n_tuples_updated += tabmsg[i].t_counts.t_tuples_updated;
dbentry->n_tuples_deleted += tabmsg[i].t_counts.t_tuples_deleted;
dbentry->n_blocks_fetched += tabmsg[i].t_counts.t_blocks_fetched;
dbentry->n_blocks_hit += tabmsg[i].t_counts.t_blocks_hit;
} }
} }
......
...@@ -8,7 +8,7 @@ ...@@ -8,7 +8,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/storage/buffer/bufmgr.c,v 1.218 2007/05/02 23:34:48 tgl Exp $ * $PostgreSQL: pgsql/src/backend/storage/buffer/bufmgr.c,v 1.219 2007/05/27 03:50:39 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -88,12 +88,6 @@ static bool IsForInput; ...@@ -88,12 +88,6 @@ static bool IsForInput;
/* local state for LockBufferForCleanup */ /* local state for LockBufferForCleanup */
static volatile BufferDesc *PinCountWaitBuf = NULL; static volatile BufferDesc *PinCountWaitBuf = NULL;
/*
* Global statistics for the bgwriter. The contents of this variable
* only makes sense in the bgwriter process.
*/
extern PgStat_MsgBgWriter BgWriterStats;
static Buffer ReadBuffer_common(Relation reln, BlockNumber blockNum, static Buffer ReadBuffer_common(Relation reln, BlockNumber blockNum,
bool zeroPage); bool zeroPage);
...@@ -174,7 +168,7 @@ ReadBuffer_common(Relation reln, BlockNumber blockNum, bool zeroPage) ...@@ -174,7 +168,7 @@ ReadBuffer_common(Relation reln, BlockNumber blockNum, bool zeroPage)
if (isExtend) if (isExtend)
blockNum = smgrnblocks(reln->rd_smgr); blockNum = smgrnblocks(reln->rd_smgr);
pgstat_count_buffer_read(&reln->pgstat_info, reln); pgstat_count_buffer_read(reln);
if (isLocalBuf) if (isLocalBuf)
{ {
...@@ -204,7 +198,7 @@ ReadBuffer_common(Relation reln, BlockNumber blockNum, bool zeroPage) ...@@ -204,7 +198,7 @@ ReadBuffer_common(Relation reln, BlockNumber blockNum, bool zeroPage)
if (!isExtend) if (!isExtend)
{ {
/* Just need to update stats before we exit */ /* Just need to update stats before we exit */
pgstat_count_buffer_hit(&reln->pgstat_info, reln); pgstat_count_buffer_hit(reln);
if (VacuumCostActive) if (VacuumCostActive)
VacuumCostBalance += VacuumCostPageHit; VacuumCostBalance += VacuumCostPageHit;
......
...@@ -8,7 +8,7 @@ ...@@ -8,7 +8,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/utils/cache/relcache.c,v 1.260 2007/05/02 21:08:46 tgl Exp $ * $PostgreSQL: pgsql/src/backend/utils/cache/relcache.c,v 1.261 2007/05/27 03:50:39 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -1802,6 +1802,7 @@ RelationClearRelation(Relation relation, bool rebuild) ...@@ -1802,6 +1802,7 @@ RelationClearRelation(Relation relation, bool rebuild)
int old_refcnt = relation->rd_refcnt; int old_refcnt = relation->rd_refcnt;
SubTransactionId old_createSubid = relation->rd_createSubid; SubTransactionId old_createSubid = relation->rd_createSubid;
SubTransactionId old_newRelfilenodeSubid = relation->rd_newRelfilenodeSubid; SubTransactionId old_newRelfilenodeSubid = relation->rd_newRelfilenodeSubid;
struct PgStat_TableStatus *old_pgstat_info = relation->pgstat_info;
TupleDesc old_att = relation->rd_att; TupleDesc old_att = relation->rd_att;
RuleLock *old_rules = relation->rd_rules; RuleLock *old_rules = relation->rd_rules;
MemoryContext old_rulescxt = relation->rd_rulescxt; MemoryContext old_rulescxt = relation->rd_rulescxt;
...@@ -1821,6 +1822,7 @@ RelationClearRelation(Relation relation, bool rebuild) ...@@ -1821,6 +1822,7 @@ RelationClearRelation(Relation relation, bool rebuild)
relation->rd_refcnt = old_refcnt; relation->rd_refcnt = old_refcnt;
relation->rd_createSubid = old_createSubid; relation->rd_createSubid = old_createSubid;
relation->rd_newRelfilenodeSubid = old_newRelfilenodeSubid; relation->rd_newRelfilenodeSubid = old_newRelfilenodeSubid;
relation->pgstat_info = old_pgstat_info;
if (equalTupleDescs(old_att, relation->rd_att)) if (equalTupleDescs(old_att, relation->rd_att))
{ {
......
...@@ -7,7 +7,7 @@ ...@@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group * Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
* *
* $PostgreSQL: pgsql/src/include/access/heapam.h,v 1.123 2007/04/08 01:26:33 tgl Exp $ * $PostgreSQL: pgsql/src/include/access/heapam.h,v 1.124 2007/05/27 03:50:39 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -147,10 +147,10 @@ extern HeapTuple heap_getnext(HeapScanDesc scan, ScanDirection direction); ...@@ -147,10 +147,10 @@ extern HeapTuple heap_getnext(HeapScanDesc scan, ScanDirection direction);
extern bool heap_fetch(Relation relation, Snapshot snapshot, extern bool heap_fetch(Relation relation, Snapshot snapshot,
HeapTuple tuple, Buffer *userbuf, bool keep_buf, HeapTuple tuple, Buffer *userbuf, bool keep_buf,
PgStat_Info *pgstat_info); Relation stats_relation);
extern bool heap_release_fetch(Relation relation, Snapshot snapshot, extern bool heap_release_fetch(Relation relation, Snapshot snapshot,
HeapTuple tuple, Buffer *userbuf, bool keep_buf, HeapTuple tuple, Buffer *userbuf, bool keep_buf,
PgStat_Info *pgstat_info); Relation stats_relation);
extern void heap_get_latest_tid(Relation relation, Snapshot snapshot, extern void heap_get_latest_tid(Relation relation, Snapshot snapshot,
ItemPointer tid); ItemPointer tid);
......
...@@ -7,7 +7,7 @@ ...@@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group * Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
* *
* $PostgreSQL: pgsql/src/include/access/relscan.h,v 1.52 2007/01/20 18:43:35 neilc Exp $ * $PostgreSQL: pgsql/src/include/access/relscan.h,v 1.53 2007/05/27 03:50:39 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -37,8 +37,6 @@ typedef struct HeapScanDescData ...@@ -37,8 +37,6 @@ typedef struct HeapScanDescData
/* NB: if rs_cbuf is not InvalidBuffer, we hold a pin on that buffer */ /* NB: if rs_cbuf is not InvalidBuffer, we hold a pin on that buffer */
ItemPointerData rs_mctid; /* marked scan position, if any */ ItemPointerData rs_mctid; /* marked scan position, if any */
PgStat_Info rs_pgstat_info; /* statistics collector hook */
/* these fields only used in page-at-a-time mode */ /* these fields only used in page-at-a-time mode */
int rs_cindex; /* current tuple's index in vistuples */ int rs_cindex; /* current tuple's index in vistuples */
int rs_mindex; /* marked tuple's saved index */ int rs_mindex; /* marked tuple's saved index */
...@@ -78,8 +76,6 @@ typedef struct IndexScanDescData ...@@ -78,8 +76,6 @@ typedef struct IndexScanDescData
HeapTupleData xs_ctup; /* current heap tuple, if any */ HeapTupleData xs_ctup; /* current heap tuple, if any */
Buffer xs_cbuf; /* current heap buffer in scan, if any */ Buffer xs_cbuf; /* current heap buffer in scan, if any */
/* NB: if xs_cbuf is not InvalidBuffer, we hold a pin on that buffer */ /* NB: if xs_cbuf is not InvalidBuffer, we hold a pin on that buffer */
PgStat_Info xs_pgstat_info; /* statistics collector hook */
} IndexScanDescData; } IndexScanDescData;
typedef IndexScanDescData *IndexScanDesc; typedef IndexScanDescData *IndexScanDesc;
......
...@@ -7,7 +7,7 @@ ...@@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group * Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
* *
* $PostgreSQL: pgsql/src/include/access/twophase_rmgr.h,v 1.4 2007/01/05 22:19:51 momjian Exp $ * $PostgreSQL: pgsql/src/include/access/twophase_rmgr.h,v 1.5 2007/05/27 03:50:39 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -26,7 +26,8 @@ typedef uint8 TwoPhaseRmgrId; ...@@ -26,7 +26,8 @@ typedef uint8 TwoPhaseRmgrId;
#define TWOPHASE_RM_INVAL_ID 2 #define TWOPHASE_RM_INVAL_ID 2
#define TWOPHASE_RM_FLATFILES_ID 3 #define TWOPHASE_RM_FLATFILES_ID 3
#define TWOPHASE_RM_NOTIFY_ID 4 #define TWOPHASE_RM_NOTIFY_ID 4
#define TWOPHASE_RM_MAX_ID TWOPHASE_RM_NOTIFY_ID #define TWOPHASE_RM_PGSTAT_ID 5
#define TWOPHASE_RM_MAX_ID TWOPHASE_RM_PGSTAT_ID
extern const TwoPhaseCallback twophase_recover_callbacks[]; extern const TwoPhaseCallback twophase_recover_callbacks[];
extern const TwoPhaseCallback twophase_postcommit_callbacks[]; extern const TwoPhaseCallback twophase_postcommit_callbacks[];
......
...@@ -5,7 +5,7 @@ ...@@ -5,7 +5,7 @@
* *
* Copyright (c) 2001-2007, PostgreSQL Global Development Group * Copyright (c) 2001-2007, PostgreSQL Global Development Group
* *
* $PostgreSQL: pgsql/src/include/pgstat.h,v 1.58 2007/04/30 16:37:08 tgl Exp $ * $PostgreSQL: pgsql/src/include/pgstat.h,v 1.59 2007/05/27 03:50:39 tgl Exp $
* ---------- * ----------
*/ */
#ifndef PGSTAT_H #ifndef PGSTAT_H
...@@ -40,6 +40,90 @@ typedef enum StatMsgType ...@@ -40,6 +40,90 @@ typedef enum StatMsgType
*/ */
typedef int64 PgStat_Counter; typedef int64 PgStat_Counter;
/* ----------
* PgStat_TableCounts The actual per-table counts kept by a backend
*
* This struct should contain only actual event counters, because we memcmp
* it against zeroes to detect whether there are any counts to transmit.
* It is a component of PgStat_TableStatus (within-backend state) and
* PgStat_TableEntry (the transmitted message format).
*
* Note: for a table, tuples_returned is the number of tuples successfully
* fetched by heap_getnext, while tuples_fetched is the number of tuples
* successfully fetched by heap_fetch under the control of bitmap indexscans.
* For an index, tuples_returned is the number of index entries returned by
* the index AM, while tuples_fetched is the number of tuples successfully
* fetched by heap_fetch under the control of simple indexscans for this index.
*
* tuples_inserted/tuples_updated/tuples_deleted count attempted actions,
* regardless of whether the transaction committed. new_live_tuples and
* new_dead_tuples are properly adjusted depending on commit or abort.
* ----------
*/
typedef struct PgStat_TableCounts
{
PgStat_Counter t_numscans;
PgStat_Counter t_tuples_returned;
PgStat_Counter t_tuples_fetched;
PgStat_Counter t_tuples_inserted;
PgStat_Counter t_tuples_updated;
PgStat_Counter t_tuples_deleted;
PgStat_Counter t_new_live_tuples;
PgStat_Counter t_new_dead_tuples;
PgStat_Counter t_blocks_fetched;
PgStat_Counter t_blocks_hit;
} PgStat_TableCounts;
/* ------------------------------------------------------------
* Structures kept in backend local memory while accumulating counts
* ------------------------------------------------------------
*/
/* ----------
* PgStat_TableStatus Per-table status within a backend
*
* Most of the event counters are nontransactional, ie, we count events
* in committed and aborted transactions alike. For these, we just count
* directly in the PgStat_TableStatus. However, new_live_tuples and
* new_dead_tuples must be derived from tuple insertion and deletion counts
* with awareness of whether the transaction or subtransaction committed or
* aborted. Hence, we also keep a stack of per-(sub)transaction status
* records for every table modified in the current transaction. At commit
* or abort, we propagate tuples_inserted and tuples_deleted up to the
* parent subtransaction level, or out to the parent PgStat_TableStatus,
* as appropriate.
* ----------
*/
typedef struct PgStat_TableStatus
{
Oid t_id; /* table's OID */
bool t_shared; /* is it a shared catalog? */
struct PgStat_TableXactStatus *trans; /* lowest subxact's counts */
PgStat_TableCounts t_counts; /* event counts to be sent */
} PgStat_TableStatus;
/* ----------
* PgStat_TableXactStatus Per-table, per-subtransaction status
* ----------
*/
typedef struct PgStat_TableXactStatus
{
PgStat_Counter tuples_inserted; /* tuples inserted in (sub)xact */
PgStat_Counter tuples_deleted; /* tuples deleted in (sub)xact */
int nest_level; /* subtransaction nest level */
/* links to other structs for same relation: */
struct PgStat_TableXactStatus *upper; /* next higher subxact if any */
PgStat_TableStatus *parent; /* per-table status */
/* structs of same subxact level are linked here: */
struct PgStat_TableXactStatus *next; /* next of same subxact */
} PgStat_TableXactStatus;
/* ------------------------------------------------------------ /* ------------------------------------------------------------
* Message formats follow * Message formats follow
...@@ -78,30 +162,12 @@ typedef struct PgStat_MsgDummy ...@@ -78,30 +162,12 @@ typedef struct PgStat_MsgDummy
/* ---------- /* ----------
* PgStat_TableEntry Per-table info in a MsgTabstat * PgStat_TableEntry Per-table info in a MsgTabstat
*
* Note: for a table, tuples_returned is the number of tuples successfully
* fetched by heap_getnext, while tuples_fetched is the number of tuples
* successfully fetched by heap_fetch under the control of bitmap indexscans.
* For an index, tuples_returned is the number of index entries returned by
* the index AM, while tuples_fetched is the number of tuples successfully
* fetched by heap_fetch under the control of simple indexscans for this index.
* ---------- * ----------
*/ */
typedef struct PgStat_TableEntry typedef struct PgStat_TableEntry
{ {
Oid t_id; Oid t_id;
PgStat_TableCounts t_counts;
PgStat_Counter t_numscans;
PgStat_Counter t_tuples_returned;
PgStat_Counter t_tuples_fetched;
PgStat_Counter t_tuples_inserted;
PgStat_Counter t_tuples_updated;
PgStat_Counter t_tuples_deleted;
PgStat_Counter t_blocks_fetched;
PgStat_Counter t_blocks_hit;
} PgStat_TableEntry; } PgStat_TableEntry;
/* ---------- /* ----------
...@@ -393,6 +459,10 @@ extern bool pgstat_collect_tuplelevel; ...@@ -393,6 +459,10 @@ extern bool pgstat_collect_tuplelevel;
extern bool pgstat_collect_blocklevel; extern bool pgstat_collect_blocklevel;
extern bool pgstat_collect_querystring; extern bool pgstat_collect_querystring;
/*
* BgWriter statistics counters are updated directly by bgwriter and bufmgr
*/
extern PgStat_MsgBgWriter BgWriterStats;
/* ---------- /* ----------
* Functions called from postmaster * Functions called from postmaster
...@@ -436,83 +506,67 @@ extern void pgstat_report_activity(const char *what); ...@@ -436,83 +506,67 @@ extern void pgstat_report_activity(const char *what);
extern void pgstat_report_txn_timestamp(TimestampTz tstamp); extern void pgstat_report_txn_timestamp(TimestampTz tstamp);
extern void pgstat_report_waiting(bool waiting); extern void pgstat_report_waiting(bool waiting);
extern void pgstat_initstats(PgStat_Info *stats, Relation rel); extern void pgstat_initstats(Relation rel);
/* nontransactional event counts are simple enough to inline */
#define pgstat_count_heap_scan(s) \ #define pgstat_count_heap_scan(rel) \
do { \ do { \
if (pgstat_collect_tuplelevel && (s)->tabentry != NULL) \ if (pgstat_collect_tuplelevel && (rel)->pgstat_info != NULL) \
((PgStat_TableEntry *)((s)->tabentry))->t_numscans++; \ (rel)->pgstat_info->t_counts.t_numscans++; \
} while (0) } while (0)
/* kluge for bitmap scans: */ /* kluge for bitmap scans: */
#define pgstat_discount_heap_scan(s) \ #define pgstat_discount_heap_scan(rel) \
do { \ do { \
if (pgstat_collect_tuplelevel && (s)->tabentry != NULL) \ if (pgstat_collect_tuplelevel && (rel)->pgstat_info != NULL) \
((PgStat_TableEntry *)((s)->tabentry))->t_numscans--; \ (rel)->pgstat_info->t_counts.t_numscans--; \
} while (0) } while (0)
#define pgstat_count_heap_getnext(s) \ #define pgstat_count_heap_getnext(rel) \
do { \ do { \
if (pgstat_collect_tuplelevel && (s)->tabentry != NULL) \ if (pgstat_collect_tuplelevel && (rel)->pgstat_info != NULL) \
((PgStat_TableEntry *)((s)->tabentry))->t_tuples_returned++; \ (rel)->pgstat_info->t_counts.t_tuples_returned++; \
} while (0) } while (0)
#define pgstat_count_heap_fetch(s) \ #define pgstat_count_heap_fetch(rel) \
do { \ do { \
if (pgstat_collect_tuplelevel && (s)->tabentry != NULL) \ if (pgstat_collect_tuplelevel && (rel)->pgstat_info != NULL) \
((PgStat_TableEntry *)((s)->tabentry))->t_tuples_fetched++; \ (rel)->pgstat_info->t_counts.t_tuples_fetched++; \
} while (0) } while (0)
#define pgstat_count_heap_insert(s) \ #define pgstat_count_index_scan(rel) \
do { \ do { \
if (pgstat_collect_tuplelevel && (s)->tabentry != NULL) \ if (pgstat_collect_tuplelevel && (rel)->pgstat_info != NULL) \
((PgStat_TableEntry *)((s)->tabentry))->t_tuples_inserted++; \ (rel)->pgstat_info->t_counts.t_numscans++; \
} while (0) } while (0)
#define pgstat_count_heap_update(s) \ #define pgstat_count_index_tuples(rel, n) \
do { \ do { \
if (pgstat_collect_tuplelevel && (s)->tabentry != NULL) \ if (pgstat_collect_tuplelevel && (rel)->pgstat_info != NULL) \
((PgStat_TableEntry *)((s)->tabentry))->t_tuples_updated++; \ (rel)->pgstat_info->t_counts.t_tuples_returned += (n); \
} while (0) } while (0)
#define pgstat_count_heap_delete(s) \ #define pgstat_count_buffer_read(rel) \
do { \ do { \
if (pgstat_collect_tuplelevel && (s)->tabentry != NULL) \ if (pgstat_collect_blocklevel && (rel)->pgstat_info != NULL) \
((PgStat_TableEntry *)((s)->tabentry))->t_tuples_deleted++; \ (rel)->pgstat_info->t_counts.t_blocks_fetched++; \
} while (0) } while (0)
#define pgstat_count_index_scan(s) \ #define pgstat_count_buffer_hit(rel) \
do { \ do { \
if (pgstat_collect_tuplelevel && (s)->tabentry != NULL) \ if (pgstat_collect_blocklevel && (rel)->pgstat_info != NULL) \
((PgStat_TableEntry *)((s)->tabentry))->t_numscans++; \ (rel)->pgstat_info->t_counts.t_blocks_hit++; \
} while (0)
#define pgstat_count_index_tuples(s, n) \
do { \
if (pgstat_collect_tuplelevel && (s)->tabentry != NULL) \
((PgStat_TableEntry *)((s)->tabentry))->t_tuples_returned += (n); \
} while (0)
#define pgstat_count_buffer_read(s,r) \
do { \
if (pgstat_collect_blocklevel) { \
if ((s)->tabentry != NULL) \
((PgStat_TableEntry *)((s)->tabentry))->t_blocks_fetched++; \
else { \
pgstat_initstats((s), (r)); \
if ((s)->tabentry != NULL) \
((PgStat_TableEntry *)((s)->tabentry))->t_blocks_fetched++; \
} \
} \
} while (0)
#define pgstat_count_buffer_hit(s,r) \
do { \
if (pgstat_collect_blocklevel) { \
if ((s)->tabentry != NULL) \
((PgStat_TableEntry *)((s)->tabentry))->t_blocks_hit++; \
else { \
pgstat_initstats((s), (r)); \
if ((s)->tabentry != NULL) \
((PgStat_TableEntry *)((s)->tabentry))->t_blocks_hit++; \
} \
} \
} while (0) } while (0)
extern void pgstat_count_heap_insert(Relation rel);
extern void pgstat_count_heap_update(Relation rel);
extern void pgstat_count_heap_delete(Relation rel);
extern void AtEOXact_PgStat(bool isCommit);
extern void AtEOSubXact_PgStat(bool isCommit, int nestDepth);
extern void AtPrepare_PgStat(void);
extern void PostPrepare_PgStat(void);
extern void pgstat_twophase_postcommit(TransactionId xid, uint16 info,
void *recdata, uint32 len);
extern void pgstat_twophase_postabort(TransactionId xid, uint16 info,
void *recdata, uint32 len);
extern void pgstat_count_xact_commit(void);
extern void pgstat_count_xact_rollback(void);
extern void pgstat_send_bgwriter(void); extern void pgstat_send_bgwriter(void);
/* ---------- /* ----------
......
...@@ -7,7 +7,7 @@ ...@@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group * Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
* *
* $PostgreSQL: pgsql/src/include/utils/rel.h,v 1.100 2007/03/29 00:15:39 tgl Exp $ * $PostgreSQL: pgsql/src/include/utils/rel.h,v 1.101 2007/05/27 03:50:39 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -89,15 +89,6 @@ typedef struct TriggerDesc ...@@ -89,15 +89,6 @@ typedef struct TriggerDesc
} TriggerDesc; } TriggerDesc;
/*
* Same for the statistics collector data in Relation and scan data.
*/
typedef struct PgStat_Info
{
void *tabentry;
} PgStat_Info;
/* /*
* Cached lookup information for the index access method functions defined * Cached lookup information for the index access method functions defined
* by the pg_am row associated with an index relation. * by the pg_am row associated with an index relation.
...@@ -200,8 +191,8 @@ typedef struct RelationData ...@@ -200,8 +191,8 @@ typedef struct RelationData
List *rd_indpred; /* index predicate tree, if any */ List *rd_indpred; /* index predicate tree, if any */
void *rd_amcache; /* available for use by index AM */ void *rd_amcache; /* available for use by index AM */
/* statistics collection area */ /* use "struct" here to avoid needing to include pgstat.h: */
PgStat_Info pgstat_info; struct PgStat_TableStatus *pgstat_info; /* statistics collection area */
} RelationData; } RelationData;
typedef RelationData *Relation; typedef RelationData *Relation;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment