Commit 5d00b764 authored by Tom Lane's avatar Tom Lane

Make pgstat tabstat lookup hash table less fragile.

Code review for commit 090010f2.

Fix cases where an elog(ERROR) partway through a function would leave the
persistent data structures in a corrupt state.  pgstat_report_stat got this
wrong by invalidating PgStat_TableEntry structs before removing hashtable
entries pointing to them, and get_tabstat_entry got it wrong by ignoring
the possibility of palloc failure after it had already created a hashtable
entry.

Also, avoid leaking a memory context per transaction, which the previous
code did through misunderstanding hash_create's API.  We do not need to
create a context to hold the hash table; hash_create will do that.
(The leak wasn't that large, amounting to only a memory context header
per iteration, but it's still surprising that nobody noticed it yet.)
parent b91e5b46
...@@ -174,7 +174,7 @@ typedef struct TabStatusArray ...@@ -174,7 +174,7 @@ typedef struct TabStatusArray
static TabStatusArray *pgStatTabList = NULL; static TabStatusArray *pgStatTabList = NULL;
/* /*
* pgStatTabHash entry * pgStatTabHash entry: map from relation OID to PgStat_TableStatus pointer
*/ */
typedef struct TabStatHashEntry typedef struct TabStatHashEntry
{ {
...@@ -805,6 +805,17 @@ pgstat_report_stat(bool force) ...@@ -805,6 +805,17 @@ pgstat_report_stat(bool force)
return; return;
last_report = now; last_report = now;
/*
* Destroy pgStatTabHash before we start invalidating PgStat_TableEntry
* entries it points to. (Should we fail partway through the loop below,
* it's okay to have removed the hashtable already --- the only
* consequence is we'd get multiple entries for the same table in the
* pgStatTabList, and that's safe.)
*/
if (pgStatTabHash)
hash_destroy(pgStatTabHash);
pgStatTabHash = NULL;
/* /*
* Scan through the TabStatusArray struct(s) to find tables that actually * Scan through the TabStatusArray struct(s) to find tables that actually
* have counts, and build messages to send. We have to separate shared * have counts, and build messages to send. We have to separate shared
...@@ -855,14 +866,6 @@ pgstat_report_stat(bool force) ...@@ -855,14 +866,6 @@ pgstat_report_stat(bool force)
tsa->tsa_used = 0; tsa->tsa_used = 0;
} }
/*
* pgStatTabHash is outdated on this point so we have to clean it,
* hash_destroy() will remove hash memory context, allocated in
* make_sure_stat_tab_initialized()
*/
hash_destroy(pgStatTabHash);
pgStatTabHash = NULL;
/* /*
* Send partial messages. Make sure that any pending xact commit/abort * Send partial messages. Make sure that any pending xact commit/abort
* gets counted, even if there are no table stats to send. * gets counted, even if there are no table stats to send.
...@@ -1707,41 +1710,6 @@ pgstat_initstats(Relation rel) ...@@ -1707,41 +1710,6 @@ pgstat_initstats(Relation rel)
rel->pgstat_info = get_tabstat_entry(rel_id, rel->rd_rel->relisshared); rel->pgstat_info = get_tabstat_entry(rel_id, rel->rd_rel->relisshared);
} }
/*
* Make sure pgStatTabList and pgStatTabHash are initialized.
*/
static void
make_sure_stat_tab_initialized()
{
HASHCTL ctl;
MemoryContext new_ctx;
if(!pgStatTabList)
{
/* This is first time procedure is called */
pgStatTabList = (TabStatusArray *) MemoryContextAllocZero(TopMemoryContext,
sizeof(TabStatusArray));
}
if(pgStatTabHash)
return;
/* Hash table was freed or never existed. */
new_ctx = AllocSetContextCreate(
TopMemoryContext,
"PGStatLookupHashTableContext",
ALLOCSET_DEFAULT_SIZES);
memset(&ctl, 0, sizeof(ctl));
ctl.keysize = sizeof(Oid);
ctl.entrysize = sizeof(TabStatHashEntry);
ctl.hcxt = new_ctx;
pgStatTabHash = hash_create("pgstat t_id to tsa_entry lookup hash table",
TABSTAT_QUANTUM, &ctl, HASH_ELEM | HASH_BLOBS | HASH_CONTEXT);
}
/* /*
* get_tabstat_entry - find or create a PgStat_TableStatus entry for rel * get_tabstat_entry - find or create a PgStat_TableStatus entry for rel
*/ */
...@@ -1753,42 +1721,75 @@ get_tabstat_entry(Oid rel_id, bool isshared) ...@@ -1753,42 +1721,75 @@ get_tabstat_entry(Oid rel_id, bool isshared)
TabStatusArray *tsa; TabStatusArray *tsa;
bool found; bool found;
make_sure_stat_tab_initialized(); /*
* Create hash table if we don't have it already.
*/
if (pgStatTabHash == NULL)
{
HASHCTL ctl;
memset(&ctl, 0, sizeof(ctl));
ctl.keysize = sizeof(Oid);
ctl.entrysize = sizeof(TabStatHashEntry);
pgStatTabHash = hash_create("pgstat TabStatusArray lookup hash table",
TABSTAT_QUANTUM,
&ctl,
HASH_ELEM | HASH_BLOBS);
}
/* /*
* Find an entry or create a new one. * Find an entry or create a new one.
*/ */
hash_entry = hash_search(pgStatTabHash, &rel_id, HASH_ENTER, &found); hash_entry = hash_search(pgStatTabHash, &rel_id, HASH_ENTER, &found);
if(found) if (!found)
{
/* initialize new entry with null pointer */
hash_entry->tsa_entry = NULL;
}
/*
* If entry is already valid, we're done.
*/
if (hash_entry->tsa_entry)
return hash_entry->tsa_entry; return hash_entry->tsa_entry;
/* /*
* `hash_entry` was just created and now we have to fill it. * Locate the first pgStatTabList entry with free space, making a new list
* First make sure there is a free space in a last element of pgStatTabList. * entry if needed. Note that we could get an OOM failure here, but if so
* we have left the hashtable and the list in a consistent state.
*/ */
tsa = pgStatTabList; if (pgStatTabList == NULL)
while(tsa->tsa_used == TABSTAT_QUANTUM)
{ {
if(tsa->tsa_next == NULL) /* Set up first pgStatTabList entry */
{ pgStatTabList = (TabStatusArray *)
tsa->tsa_next = (TabStatusArray *) MemoryContextAllocZero(TopMemoryContext, MemoryContextAllocZero(TopMemoryContext,
sizeof(TabStatusArray)); sizeof(TabStatusArray));
} }
tsa = pgStatTabList;
while (tsa->tsa_used >= TABSTAT_QUANTUM)
{
if (tsa->tsa_next == NULL)
tsa->tsa_next = (TabStatusArray *)
MemoryContextAllocZero(TopMemoryContext,
sizeof(TabStatusArray));
tsa = tsa->tsa_next; tsa = tsa->tsa_next;
} }
/* /*
* Add an entry. * Allocate a PgStat_TableStatus entry within this list entry. We assume
* the entry was already zeroed, either at creation or after last use.
*/ */
entry = &tsa->tsa_entries[tsa->tsa_used++]; entry = &tsa->tsa_entries[tsa->tsa_used++];
entry->t_id = rel_id; entry->t_id = rel_id;
entry->t_shared = isshared; entry->t_shared = isshared;
/* /*
* Add a corresponding entry to pgStatTabHash. * Now we can fill the entry in pgStatTabHash.
*/ */
hash_entry->tsa_entry = entry; hash_entry->tsa_entry = entry;
return entry; return entry;
} }
...@@ -1796,15 +1797,17 @@ get_tabstat_entry(Oid rel_id, bool isshared) ...@@ -1796,15 +1797,17 @@ get_tabstat_entry(Oid rel_id, bool isshared)
* find_tabstat_entry - find any existing PgStat_TableStatus entry for rel * find_tabstat_entry - find any existing PgStat_TableStatus entry for rel
* *
* If no entry, return NULL, don't create a new one * If no entry, return NULL, don't create a new one
*
* Note: if we got an error in the most recent execution of pgstat_report_stat,
* it's possible that an entry exists but there's no hashtable entry for it.
* That's okay, we'll treat this case as "doesn't exist".
*/ */
PgStat_TableStatus * PgStat_TableStatus *
find_tabstat_entry(Oid rel_id) find_tabstat_entry(Oid rel_id)
{ {
TabStatHashEntry* hash_entry; TabStatHashEntry* hash_entry;
/* /* If hashtable doesn't exist, there are no entries at all */
* There are no entries at all.
*/
if(!pgStatTabHash) if(!pgStatTabHash)
return NULL; return NULL;
...@@ -1812,6 +1815,7 @@ find_tabstat_entry(Oid rel_id) ...@@ -1812,6 +1815,7 @@ find_tabstat_entry(Oid rel_id)
if(!hash_entry) if(!hash_entry)
return NULL; return NULL;
/* Note that this step could also return NULL, but that's correct */
return hash_entry->tsa_entry; return hash_entry->tsa_entry;
} }
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment