Commit 39b03690 authored by Fujii Masao's avatar Fujii Masao

Log long wait time on recovery conflict when it's resolved.

This is a follow-up of the work done in commit 0650ff23. This commit
extends log_recovery_conflict_waits so that a log message is produced
also when recovery conflict has already been resolved after deadlock_timeout
passes, i.e., when the startup process finishes waiting for recovery
conflict after deadlock_timeout. This is useful in investigating how long
recovery conflicts prevented the recovery from applying WAL.

Author: Fujii Masao
Reviewed-by: Kyotaro Horiguchi, Bertrand Drouvot
Discussion: https://postgr.es/m/9a60178c-a853-1440-2cdc-c3af916cff59@amazon.com
parent 6ecaaf81
...@@ -6961,7 +6961,7 @@ log_line_prefix = '%m [%p] %q%u@%d/%a ' ...@@ -6961,7 +6961,7 @@ log_line_prefix = '%m [%p] %q%u@%d/%a '
<listitem> <listitem>
<para> <para>
Controls whether a log message is produced when the startup process Controls whether a log message is produced when the startup process
is waiting longer than <varname>deadlock_timeout</varname> waits longer than <varname>deadlock_timeout</varname>
for recovery conflicts. This is useful in determining if recovery for recovery conflicts. This is useful in determining if recovery
conflicts prevent the recovery from applying WAL. conflicts prevent the recovery from applying WAL.
</para> </para>
......
...@@ -4034,6 +4034,16 @@ LockBufferForCleanup(Buffer buffer) ...@@ -4034,6 +4034,16 @@ LockBufferForCleanup(Buffer buffer)
/* Successfully acquired exclusive lock with pincount 1 */ /* Successfully acquired exclusive lock with pincount 1 */
UnlockBufHdr(bufHdr, buf_state); UnlockBufHdr(bufHdr, buf_state);
/*
* Emit the log message if recovery conflict on buffer pin was
* resolved but the startup process waited longer than
* deadlock_timeout for it.
*/
if (logged_recovery_conflict)
LogRecoveryConflict(PROCSIG_RECOVERY_CONFLICT_BUFFERPIN,
waitStart, GetCurrentTimestamp(),
NULL, false);
/* Report change to non-waiting status */ /* Report change to non-waiting status */
if (new_status) if (new_status)
{ {
...@@ -4088,7 +4098,7 @@ LockBufferForCleanup(Buffer buffer) ...@@ -4088,7 +4098,7 @@ LockBufferForCleanup(Buffer buffer)
DeadlockTimeout)) DeadlockTimeout))
{ {
LogRecoveryConflict(PROCSIG_RECOVERY_CONFLICT_BUFFERPIN, LogRecoveryConflict(PROCSIG_RECOVERY_CONFLICT_BUFFERPIN,
waitStart, now, NULL); waitStart, now, NULL, true);
logged_recovery_conflict = true; logged_recovery_conflict = true;
} }
} }
......
...@@ -226,11 +226,14 @@ WaitExceedsMaxStandbyDelay(uint32 wait_event_info) ...@@ -226,11 +226,14 @@ WaitExceedsMaxStandbyDelay(uint32 wait_event_info)
* wait_start is the timestamp when the caller started to wait. * wait_start is the timestamp when the caller started to wait.
* now is the timestamp when this function has been called. * now is the timestamp when this function has been called.
* wait_list is the list of virtual transaction ids assigned to * wait_list is the list of virtual transaction ids assigned to
* conflicting processes. * conflicting processes. still_waiting indicates whether
* the startup process is still waiting for the recovery conflict
* to be resolved or not.
*/ */
void void
LogRecoveryConflict(ProcSignalReason reason, TimestampTz wait_start, LogRecoveryConflict(ProcSignalReason reason, TimestampTz wait_start,
TimestampTz now, VirtualTransactionId *wait_list) TimestampTz now, VirtualTransactionId *wait_list,
bool still_waiting)
{ {
long secs; long secs;
int usecs; int usecs;
...@@ -238,6 +241,12 @@ LogRecoveryConflict(ProcSignalReason reason, TimestampTz wait_start, ...@@ -238,6 +241,12 @@ LogRecoveryConflict(ProcSignalReason reason, TimestampTz wait_start,
StringInfoData buf; StringInfoData buf;
int nprocs = 0; int nprocs = 0;
/*
* There must be no conflicting processes when the recovery conflict has
* already been resolved.
*/
Assert(still_waiting || wait_list == NULL);
TimestampDifference(wait_start, now, &secs, &usecs); TimestampDifference(wait_start, now, &secs, &usecs);
msecs = secs * 1000 + usecs / 1000; msecs = secs * 1000 + usecs / 1000;
usecs = usecs % 1000; usecs = usecs % 1000;
...@@ -275,12 +284,21 @@ LogRecoveryConflict(ProcSignalReason reason, TimestampTz wait_start, ...@@ -275,12 +284,21 @@ LogRecoveryConflict(ProcSignalReason reason, TimestampTz wait_start,
* conflicting backends in a detail message. Note that if all the backends * conflicting backends in a detail message. Note that if all the backends
* in the list are not active, no detail message is logged. * in the list are not active, no detail message is logged.
*/ */
ereport(LOG, if (still_waiting)
errmsg("recovery still waiting after %ld.%03d ms: %s", {
msecs, usecs, _(get_recovery_conflict_desc(reason))), ereport(LOG,
nprocs > 0 ? errdetail_log_plural("Conflicting process: %s.", errmsg("recovery still waiting after %ld.%03d ms: %s",
"Conflicting processes: %s.", msecs, usecs, _(get_recovery_conflict_desc(reason))),
nprocs, buf.data) : 0); nprocs > 0 ? errdetail_log_plural("Conflicting process: %s.",
"Conflicting processes: %s.",
nprocs, buf.data) : 0);
}
else
{
ereport(LOG,
errmsg("recovery finished waiting after %ld.%03d ms: %s",
msecs, usecs, _(get_recovery_conflict_desc(reason))));
}
if (nprocs > 0) if (nprocs > 0)
pfree(buf.data); pfree(buf.data);
...@@ -375,13 +393,12 @@ ResolveRecoveryConflictWithVirtualXIDs(VirtualTransactionId *waitlist, ...@@ -375,13 +393,12 @@ ResolveRecoveryConflictWithVirtualXIDs(VirtualTransactionId *waitlist,
/* /*
* Emit the log message if the startup process is waiting * Emit the log message if the startup process is waiting
* longer than deadlock_timeout for recovery conflict on * longer than deadlock_timeout for recovery conflict.
* buffer pin.
*/ */
if (maybe_log_conflict && if (maybe_log_conflict &&
TimestampDifferenceExceeds(waitStart, now, DeadlockTimeout)) TimestampDifferenceExceeds(waitStart, now, DeadlockTimeout))
{ {
LogRecoveryConflict(reason, waitStart, now, waitlist); LogRecoveryConflict(reason, waitStart, now, waitlist, true);
logged_recovery_conflict = true; logged_recovery_conflict = true;
} }
} }
...@@ -391,6 +408,14 @@ ResolveRecoveryConflictWithVirtualXIDs(VirtualTransactionId *waitlist, ...@@ -391,6 +408,14 @@ ResolveRecoveryConflictWithVirtualXIDs(VirtualTransactionId *waitlist,
waitlist++; waitlist++;
} }
/*
* Emit the log message if recovery conflict was resolved but the startup
* process waited longer than deadlock_timeout for it.
*/
if (logged_recovery_conflict)
LogRecoveryConflict(reason, waitStart, GetCurrentTimestamp(),
NULL, false);
/* Reset ps display if we changed it */ /* Reset ps display if we changed it */
if (new_status) if (new_status)
{ {
......
...@@ -1322,7 +1322,8 @@ ProcSleep(LOCALLOCK *locallock, LockMethod lockMethodTable) ...@@ -1322,7 +1322,8 @@ ProcSleep(LOCALLOCK *locallock, LockMethod lockMethodTable)
* longer than deadlock_timeout. * longer than deadlock_timeout.
*/ */
LogRecoveryConflict(PROCSIG_RECOVERY_CONFLICT_LOCK, LogRecoveryConflict(PROCSIG_RECOVERY_CONFLICT_LOCK,
standbyWaitStart, now, cnt > 0 ? vxids : NULL); standbyWaitStart, now,
cnt > 0 ? vxids : NULL, true);
logged_recovery_conflict = true; logged_recovery_conflict = true;
} }
} }
...@@ -1607,6 +1608,15 @@ ProcSleep(LOCALLOCK *locallock, LockMethod lockMethodTable) ...@@ -1607,6 +1608,15 @@ ProcSleep(LOCALLOCK *locallock, LockMethod lockMethodTable)
disable_timeout(DEADLOCK_TIMEOUT, false); disable_timeout(DEADLOCK_TIMEOUT, false);
} }
/*
* Emit the log message if recovery conflict on lock was resolved but the
* startup process waited longer than deadlock_timeout for it.
*/
if (InHotStandby && logged_recovery_conflict)
LogRecoveryConflict(PROCSIG_RECOVERY_CONFLICT_LOCK,
standbyWaitStart, GetCurrentTimestamp(),
NULL, false);
/* /*
* Re-acquire the lock table's partition lock. We have to do this to hold * Re-acquire the lock table's partition lock. We have to do this to hold
* off cancel/die interrupts before we can mess with lockAwaited (else we * off cancel/die interrupts before we can mess with lockAwaited (else we
......
...@@ -40,7 +40,8 @@ extern void StandbyDeadLockHandler(void); ...@@ -40,7 +40,8 @@ extern void StandbyDeadLockHandler(void);
extern void StandbyTimeoutHandler(void); extern void StandbyTimeoutHandler(void);
extern void StandbyLockTimeoutHandler(void); extern void StandbyLockTimeoutHandler(void);
extern void LogRecoveryConflict(ProcSignalReason reason, TimestampTz wait_start, extern void LogRecoveryConflict(ProcSignalReason reason, TimestampTz wait_start,
TimestampTz cur_ts, VirtualTransactionId *wait_list); TimestampTz cur_ts, VirtualTransactionId *wait_list,
bool still_waiting);
/* /*
* Standby Rmgr (RM_STANDBY_ID) * Standby Rmgr (RM_STANDBY_ID)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment