Commit 7c3e8039 authored by Kevin Grittner's avatar Kevin Grittner

Add a few entries to the tail of time mapping, to see old values.

Without a few entries beyond old_snapshot_threshold, the lookup
would often fail, resulting in the more aggressive pruning or
vacuum being skipped often enough to matter.  This was very clearly
shown by a python test script posted by Ants Aasma, and was likely
a factor in an earlier but somewhat less clear-cut test case posted
by Jeff Janes.

This patch makes no change to the logic, per se -- it just makes
the array of mapping entries big enough to make lookup misses based
on timing much less likely.  An occasional miss is still possible
if a thread stalls for more than 10 minutes, but that does not
create any problem with correctness of behavior.  Besides, if
things are so busy that a thread is stalling for more than 10
minutes, it is probably OK to skip the more aggressive cleanup at
that particular point in time.
parent d34e7b28
...@@ -92,10 +92,10 @@ typedef struct OldSnapshotControlData ...@@ -92,10 +92,10 @@ typedef struct OldSnapshotControlData
* Use a circular buffer with a head offset, a count of entries currently * Use a circular buffer with a head offset, a count of entries currently
* used, and a timestamp corresponding to the xid at the head offset. A * used, and a timestamp corresponding to the xid at the head offset. A
* count_used value of zero means that there are no times stored; a * count_used value of zero means that there are no times stored; a
* count_used value of old_snapshot_threshold means that the buffer is * count_used value of OLD_SNAPSHOT_TIME_MAP_ENTRIES means that the buffer
* full and the head must be advanced to add new entries. Use timestamps * is full and the head must be advanced to add new entries. Use
* aligned to minute boundaries, since that seems less surprising than * timestamps aligned to minute boundaries, since that seems less
* aligning based on the first usage timestamp. * surprising than aligning based on the first usage timestamp.
* *
* It is OK if the xid for a given time slot is from earlier than * It is OK if the xid for a given time slot is from earlier than
* calculated by adding the number of minutes corresponding to the * calculated by adding the number of minutes corresponding to the
...@@ -243,7 +243,7 @@ SnapMgrShmemSize(void) ...@@ -243,7 +243,7 @@ SnapMgrShmemSize(void)
size = offsetof(OldSnapshotControlData, xid_by_minute); size = offsetof(OldSnapshotControlData, xid_by_minute);
if (old_snapshot_threshold > 0) if (old_snapshot_threshold > 0)
size = add_size(size, mul_size(sizeof(TransactionId), size = add_size(size, mul_size(sizeof(TransactionId),
old_snapshot_threshold)); OLD_SNAPSHOT_TIME_MAP_ENTRIES));
return size; return size;
} }
...@@ -1643,7 +1643,7 @@ TransactionIdLimitedForOldSnapshots(TransactionId recentXmin, ...@@ -1643,7 +1643,7 @@ TransactionIdLimitedForOldSnapshots(TransactionId recentXmin,
if (offset > oldSnapshotControl->count_used - 1) if (offset > oldSnapshotControl->count_used - 1)
offset = oldSnapshotControl->count_used - 1; offset = oldSnapshotControl->count_used - 1;
offset = (oldSnapshotControl->head_offset + offset) offset = (oldSnapshotControl->head_offset + offset)
% old_snapshot_threshold; % OLD_SNAPSHOT_TIME_MAP_ENTRIES;
xlimit = oldSnapshotControl->xid_by_minute[offset]; xlimit = oldSnapshotControl->xid_by_minute[offset];
if (NormalTransactionIdFollows(xlimit, recentXmin)) if (NormalTransactionIdFollows(xlimit, recentXmin))
...@@ -1720,10 +1720,10 @@ MaintainOldSnapshotTimeMapping(int64 whenTaken, TransactionId xmin) ...@@ -1720,10 +1720,10 @@ MaintainOldSnapshotTimeMapping(int64 whenTaken, TransactionId xmin)
LWLockAcquire(OldSnapshotTimeMapLock, LW_EXCLUSIVE); LWLockAcquire(OldSnapshotTimeMapLock, LW_EXCLUSIVE);
Assert(oldSnapshotControl->head_offset >= 0); Assert(oldSnapshotControl->head_offset >= 0);
Assert(oldSnapshotControl->head_offset < old_snapshot_threshold); Assert(oldSnapshotControl->head_offset < OLD_SNAPSHOT_TIME_MAP_ENTRIES);
Assert((oldSnapshotControl->head_timestamp % USECS_PER_MINUTE) == 0); Assert((oldSnapshotControl->head_timestamp % USECS_PER_MINUTE) == 0);
Assert(oldSnapshotControl->count_used >= 0); Assert(oldSnapshotControl->count_used >= 0);
Assert(oldSnapshotControl->count_used <= old_snapshot_threshold); Assert(oldSnapshotControl->count_used <= OLD_SNAPSHOT_TIME_MAP_ENTRIES);
if (oldSnapshotControl->count_used == 0) if (oldSnapshotControl->count_used == 0)
{ {
...@@ -1750,7 +1750,7 @@ MaintainOldSnapshotTimeMapping(int64 whenTaken, TransactionId xmin) ...@@ -1750,7 +1750,7 @@ MaintainOldSnapshotTimeMapping(int64 whenTaken, TransactionId xmin)
int bucket = (oldSnapshotControl->head_offset int bucket = (oldSnapshotControl->head_offset
+ ((ts - oldSnapshotControl->head_timestamp) + ((ts - oldSnapshotControl->head_timestamp)
/ USECS_PER_MINUTE)) / USECS_PER_MINUTE))
% old_snapshot_threshold; % OLD_SNAPSHOT_TIME_MAP_ENTRIES;
if (TransactionIdPrecedes(oldSnapshotControl->xid_by_minute[bucket], xmin)) if (TransactionIdPrecedes(oldSnapshotControl->xid_by_minute[bucket], xmin))
oldSnapshotControl->xid_by_minute[bucket] = xmin; oldSnapshotControl->xid_by_minute[bucket] = xmin;
...@@ -1763,7 +1763,7 @@ MaintainOldSnapshotTimeMapping(int64 whenTaken, TransactionId xmin) ...@@ -1763,7 +1763,7 @@ MaintainOldSnapshotTimeMapping(int64 whenTaken, TransactionId xmin)
oldSnapshotControl->head_timestamp = ts; oldSnapshotControl->head_timestamp = ts;
if (advance >= old_snapshot_threshold) if (advance >= OLD_SNAPSHOT_TIME_MAP_ENTRIES)
{ {
/* Advance is so far that all old data is junk; start over. */ /* Advance is so far that all old data is junk; start over. */
oldSnapshotControl->head_offset = 0; oldSnapshotControl->head_offset = 0;
...@@ -1777,12 +1777,12 @@ MaintainOldSnapshotTimeMapping(int64 whenTaken, TransactionId xmin) ...@@ -1777,12 +1777,12 @@ MaintainOldSnapshotTimeMapping(int64 whenTaken, TransactionId xmin)
for (i = 0; i < advance; i++) for (i = 0; i < advance; i++)
{ {
if (oldSnapshotControl->count_used == old_snapshot_threshold) if (oldSnapshotControl->count_used == OLD_SNAPSHOT_TIME_MAP_ENTRIES)
{ {
/* Map full and new value replaces old head. */ /* Map full and new value replaces old head. */
int old_head = oldSnapshotControl->head_offset; int old_head = oldSnapshotControl->head_offset;
if (old_head == (old_snapshot_threshold - 1)) if (old_head == (OLD_SNAPSHOT_TIME_MAP_ENTRIES - 1))
oldSnapshotControl->head_offset = 0; oldSnapshotControl->head_offset = 0;
else else
oldSnapshotControl->head_offset = old_head + 1; oldSnapshotControl->head_offset = old_head + 1;
...@@ -1793,7 +1793,7 @@ MaintainOldSnapshotTimeMapping(int64 whenTaken, TransactionId xmin) ...@@ -1793,7 +1793,7 @@ MaintainOldSnapshotTimeMapping(int64 whenTaken, TransactionId xmin)
/* Extend map to unused entry. */ /* Extend map to unused entry. */
int new_tail = (oldSnapshotControl->head_offset int new_tail = (oldSnapshotControl->head_offset
+ oldSnapshotControl->count_used) + oldSnapshotControl->count_used)
% old_snapshot_threshold; % OLD_SNAPSHOT_TIME_MAP_ENTRIES;
oldSnapshotControl->count_used++; oldSnapshotControl->count_used++;
oldSnapshotControl->xid_by_minute[new_tail] = xmin; oldSnapshotControl->xid_by_minute[new_tail] = xmin;
......
...@@ -19,6 +19,19 @@ ...@@ -19,6 +19,19 @@
#include "utils/snapshot.h" #include "utils/snapshot.h"
/*
* The structure used to map times to TransactionId values for the "snapshot
* too old" feature must have a few entries at the tail to hold old values;
* otherwise the lookup will often fail and the expected early pruning or
* vacuum will not usually occur. It is best if this padding is for a number
* of minutes greater than a thread would normally be stalled, but it's OK if
* early vacuum opportunities are occasionally missed, so there's no need to
* use an extreme value or get too fancy. 10 minutes seems plenty.
*/
#define OLD_SNAPSHOT_PADDING_ENTRIES 10
#define OLD_SNAPSHOT_TIME_MAP_ENTRIES (old_snapshot_threshold + OLD_SNAPSHOT_PADDING_ENTRIES)
/* GUC variables */ /* GUC variables */
extern PGDLLIMPORT int old_snapshot_threshold; extern PGDLLIMPORT int old_snapshot_threshold;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment