Commit 3b733fcd authored by Fujii Masao's avatar Fujii Masao

Display the time when the process started waiting for the lock, in pg_locks.

This commit adds new column "waitstart" into pg_locks view. This column
reports the time when the server process started waiting for the lock
if the lock is not held. This information is useful, for example, when
examining the amount of time to wait on a lock by subtracting
"waitstart" in pg_locks from the current time, and identify the lock
that the processes are waiting for very long.

This feature uses the current time obtained for the deadlock timeout
timer as "waitstart" (i.e., the time when this process started waiting
for the lock). Since getting the current time newly can cause overhead,
we reuse the already-obtained time to avoid that overhead.

Note that "waitstart" is updated without holding the lock table's
partition lock, to avoid the overhead by additional lock acquisition.
This can cause "waitstart" in pg_locks to become NULL for a very short
period of time after the wait started even though "granted" is false.
This is OK in practice because we can assume that users are likely to
look at "waitstart" when waiting for the lock for a long time.

Bump catalog version.

Author: Atsushi Torikoshi
Reviewed-by: Ian Lawrence Barwick, Robert Haas, Justin Pryzby, Fujii Masao
Discussion: https://postgr.es/m/a96013dc51cdc56b2a2b84fa8a16a993@oss.nttdata.com
parent 7cb3048f
...@@ -97,8 +97,8 @@ SELECT bt_index_parent_check('bttest_b_idx'); ...@@ -97,8 +97,8 @@ SELECT bt_index_parent_check('bttest_b_idx');
SELECT * FROM pg_locks SELECT * FROM pg_locks
WHERE relation = ANY(ARRAY['bttest_a', 'bttest_a_idx', 'bttest_b', 'bttest_b_idx']::regclass[]) WHERE relation = ANY(ARRAY['bttest_a', 'bttest_a_idx', 'bttest_b', 'bttest_b_idx']::regclass[])
AND pid = pg_backend_pid(); AND pid = pg_backend_pid();
locktype | database | relation | page | tuple | virtualxid | transactionid | classid | objid | objsubid | virtualtransaction | pid | mode | granted | fastpath locktype | database | relation | page | tuple | virtualxid | transactionid | classid | objid | objsubid | virtualtransaction | pid | mode | granted | fastpath | waitstart
----------+----------+----------+------+-------+------------+---------------+---------+-------+----------+--------------------+-----+------+---------+---------- ----------+----------+----------+------+-------+------------+---------------+---------+-------+----------+--------------------+-----+------+---------+----------+-----------
(0 rows) (0 rows)
COMMIT; COMMIT;
......
...@@ -10604,6 +10604,19 @@ SCRAM-SHA-256$<replaceable>&lt;iteration count&gt;</replaceable>:<replaceable>&l ...@@ -10604,6 +10604,19 @@ SCRAM-SHA-256$<replaceable>&lt;iteration count&gt;</replaceable>:<replaceable>&l
lock table lock table
</para></entry> </para></entry>
</row> </row>
<row>
<entry role="catalog_table_entry"><para role="column_definition">
<structfield>waitstart</structfield> <type>timestamptz</type>
</para>
<para>
Time when the server process started waiting for this lock,
or null if the lock is held.
Note that this can be null for a very short period of time after
the wait started even though <structfield>granted</structfield>
is <literal>false</literal>.
</para></entry>
</row>
</tbody> </tbody>
</tgroup> </tgroup>
</table> </table>
......
...@@ -540,12 +540,34 @@ void ...@@ -540,12 +540,34 @@ void
ResolveRecoveryConflictWithLock(LOCKTAG locktag, bool logging_conflict) ResolveRecoveryConflictWithLock(LOCKTAG locktag, bool logging_conflict)
{ {
TimestampTz ltime; TimestampTz ltime;
TimestampTz now;
Assert(InHotStandby); Assert(InHotStandby);
ltime = GetStandbyLimitTime(); ltime = GetStandbyLimitTime();
now = GetCurrentTimestamp();
if (GetCurrentTimestamp() >= ltime && ltime != 0) /*
* Update waitStart if first time through after the startup process
* started waiting for the lock. It should not be updated every time
* ResolveRecoveryConflictWithLock() is called during the wait.
*
* Use the current time obtained for comparison with ltime as waitStart
* (i.e., the time when this process started waiting for the lock). Since
* getting the current time newly can cause overhead, we reuse the
* already-obtained time to avoid that overhead.
*
* Note that waitStart is updated without holding the lock table's
* partition lock, to avoid the overhead by additional lock acquisition.
* This can cause "waitstart" in pg_locks to become NULL for a very short
* period of time after the wait started even though "granted" is false.
* This is OK in practice because we can assume that users are likely to
* look at "waitstart" when waiting for the lock for a long time.
*/
if (pg_atomic_read_u64(&MyProc->waitStart) == 0)
pg_atomic_write_u64(&MyProc->waitStart, now);
if (now >= ltime && ltime != 0)
{ {
/* /*
* We're already behind, so clear a path as quickly as possible. * We're already behind, so clear a path as quickly as possible.
......
...@@ -3619,6 +3619,12 @@ GetLockStatusData(void) ...@@ -3619,6 +3619,12 @@ GetLockStatusData(void)
instance->leaderPid = proc->pid; instance->leaderPid = proc->pid;
instance->fastpath = true; instance->fastpath = true;
/*
* Successfully taking fast path lock means there were no
* conflicting locks.
*/
instance->waitStart = 0;
el++; el++;
} }
...@@ -3646,6 +3652,7 @@ GetLockStatusData(void) ...@@ -3646,6 +3652,7 @@ GetLockStatusData(void)
instance->pid = proc->pid; instance->pid = proc->pid;
instance->leaderPid = proc->pid; instance->leaderPid = proc->pid;
instance->fastpath = true; instance->fastpath = true;
instance->waitStart = 0;
el++; el++;
} }
...@@ -3698,6 +3705,7 @@ GetLockStatusData(void) ...@@ -3698,6 +3705,7 @@ GetLockStatusData(void)
instance->pid = proc->pid; instance->pid = proc->pid;
instance->leaderPid = proclock->groupLeader->pid; instance->leaderPid = proclock->groupLeader->pid;
instance->fastpath = false; instance->fastpath = false;
instance->waitStart = (TimestampTz) pg_atomic_read_u64(&proc->waitStart);
el++; el++;
} }
......
...@@ -402,6 +402,7 @@ InitProcess(void) ...@@ -402,6 +402,7 @@ InitProcess(void)
MyProc->lwWaitMode = 0; MyProc->lwWaitMode = 0;
MyProc->waitLock = NULL; MyProc->waitLock = NULL;
MyProc->waitProcLock = NULL; MyProc->waitProcLock = NULL;
pg_atomic_init_u64(&MyProc->waitStart, 0);
#ifdef USE_ASSERT_CHECKING #ifdef USE_ASSERT_CHECKING
{ {
int i; int i;
...@@ -1262,6 +1263,23 @@ ProcSleep(LOCALLOCK *locallock, LockMethod lockMethodTable) ...@@ -1262,6 +1263,23 @@ ProcSleep(LOCALLOCK *locallock, LockMethod lockMethodTable)
} }
else else
enable_timeout_after(DEADLOCK_TIMEOUT, DeadlockTimeout); enable_timeout_after(DEADLOCK_TIMEOUT, DeadlockTimeout);
/*
* Use the current time obtained for the deadlock timeout timer as
* waitStart (i.e., the time when this process started waiting for the
* lock). Since getting the current time newly can cause overhead, we
* reuse the already-obtained time to avoid that overhead.
*
* Note that waitStart is updated without holding the lock table's
* partition lock, to avoid the overhead by additional lock
* acquisition. This can cause "waitstart" in pg_locks to become NULL
* for a very short period of time after the wait started even though
* "granted" is false. This is OK in practice because we can assume
* that users are likely to look at "waitstart" when waiting for the
* lock for a long time.
*/
pg_atomic_write_u64(&MyProc->waitStart,
get_timeout_start_time(DEADLOCK_TIMEOUT));
} }
else if (log_recovery_conflict_waits) else if (log_recovery_conflict_waits)
{ {
...@@ -1678,6 +1696,7 @@ ProcWakeup(PGPROC *proc, ProcWaitStatus waitStatus) ...@@ -1678,6 +1696,7 @@ ProcWakeup(PGPROC *proc, ProcWaitStatus waitStatus)
proc->waitLock = NULL; proc->waitLock = NULL;
proc->waitProcLock = NULL; proc->waitProcLock = NULL;
proc->waitStatus = waitStatus; proc->waitStatus = waitStatus;
pg_atomic_write_u64(&MyProc->waitStart, 0);
/* And awaken it */ /* And awaken it */
SetLatch(&proc->procLatch); SetLatch(&proc->procLatch);
......
...@@ -63,7 +63,7 @@ typedef struct ...@@ -63,7 +63,7 @@ typedef struct
} PG_Lock_Status; } PG_Lock_Status;
/* Number of columns in pg_locks output */ /* Number of columns in pg_locks output */
#define NUM_LOCK_STATUS_COLUMNS 15 #define NUM_LOCK_STATUS_COLUMNS 16
/* /*
* VXIDGetDatum - Construct a text representation of a VXID * VXIDGetDatum - Construct a text representation of a VXID
...@@ -142,6 +142,8 @@ pg_lock_status(PG_FUNCTION_ARGS) ...@@ -142,6 +142,8 @@ pg_lock_status(PG_FUNCTION_ARGS)
BOOLOID, -1, 0); BOOLOID, -1, 0);
TupleDescInitEntry(tupdesc, (AttrNumber) 15, "fastpath", TupleDescInitEntry(tupdesc, (AttrNumber) 15, "fastpath",
BOOLOID, -1, 0); BOOLOID, -1, 0);
TupleDescInitEntry(tupdesc, (AttrNumber) 16, "waitstart",
TIMESTAMPTZOID, -1, 0);
funcctx->tuple_desc = BlessTupleDesc(tupdesc); funcctx->tuple_desc = BlessTupleDesc(tupdesc);
...@@ -336,6 +338,10 @@ pg_lock_status(PG_FUNCTION_ARGS) ...@@ -336,6 +338,10 @@ pg_lock_status(PG_FUNCTION_ARGS)
values[12] = CStringGetTextDatum(GetLockmodeName(instance->locktag.locktag_lockmethodid, mode)); values[12] = CStringGetTextDatum(GetLockmodeName(instance->locktag.locktag_lockmethodid, mode));
values[13] = BoolGetDatum(granted); values[13] = BoolGetDatum(granted);
values[14] = BoolGetDatum(instance->fastpath); values[14] = BoolGetDatum(instance->fastpath);
if (!granted && instance->waitStart != 0)
values[15] = TimestampTzGetDatum(instance->waitStart);
else
nulls[15] = true;
tuple = heap_form_tuple(funcctx->tuple_desc, values, nulls); tuple = heap_form_tuple(funcctx->tuple_desc, values, nulls);
result = HeapTupleGetDatum(tuple); result = HeapTupleGetDatum(tuple);
...@@ -406,6 +412,7 @@ pg_lock_status(PG_FUNCTION_ARGS) ...@@ -406,6 +412,7 @@ pg_lock_status(PG_FUNCTION_ARGS)
values[12] = CStringGetTextDatum("SIReadLock"); values[12] = CStringGetTextDatum("SIReadLock");
values[13] = BoolGetDatum(true); values[13] = BoolGetDatum(true);
values[14] = BoolGetDatum(false); values[14] = BoolGetDatum(false);
nulls[15] = true;
tuple = heap_form_tuple(funcctx->tuple_desc, values, nulls); tuple = heap_form_tuple(funcctx->tuple_desc, values, nulls);
result = HeapTupleGetDatum(tuple); result = HeapTupleGetDatum(tuple);
......
...@@ -53,6 +53,6 @@ ...@@ -53,6 +53,6 @@
*/ */
/* yyyymmddN */ /* yyyymmddN */
#define CATALOG_VERSION_NO 202102021 #define CATALOG_VERSION_NO 202102091
#endif #endif
...@@ -5994,9 +5994,9 @@ ...@@ -5994,9 +5994,9 @@
{ oid => '1371', descr => 'view system lock information', { oid => '1371', descr => 'view system lock information',
proname => 'pg_lock_status', prorows => '1000', proretset => 't', proname => 'pg_lock_status', prorows => '1000', proretset => 't',
provolatile => 'v', prorettype => 'record', proargtypes => '', provolatile => 'v', prorettype => 'record', proargtypes => '',
proallargtypes => '{text,oid,oid,int4,int2,text,xid,oid,oid,int2,text,int4,text,bool,bool}', proallargtypes => '{text,oid,oid,int4,int2,text,xid,oid,oid,int2,text,int4,text,bool,bool,timestamptz}',
proargmodes => '{o,o,o,o,o,o,o,o,o,o,o,o,o,o,o}', proargmodes => '{o,o,o,o,o,o,o,o,o,o,o,o,o,o,o,o}',
proargnames => '{locktype,database,relation,page,tuple,virtualxid,transactionid,classid,objid,objsubid,virtualtransaction,pid,mode,granted,fastpath}', proargnames => '{locktype,database,relation,page,tuple,virtualxid,transactionid,classid,objid,objsubid,virtualtransaction,pid,mode,granted,fastpath,waitstart}',
prosrc => 'pg_lock_status' }, prosrc => 'pg_lock_status' },
{ oid => '2561', { oid => '2561',
descr => 'get array of PIDs of sessions blocking specified backend PID from acquiring a heavyweight lock', descr => 'get array of PIDs of sessions blocking specified backend PID from acquiring a heavyweight lock',
......
...@@ -22,6 +22,7 @@ ...@@ -22,6 +22,7 @@
#include "storage/lockdefs.h" #include "storage/lockdefs.h"
#include "storage/lwlock.h" #include "storage/lwlock.h"
#include "storage/shmem.h" #include "storage/shmem.h"
#include "utils/timestamp.h"
/* struct PGPROC is declared in proc.h, but must forward-reference it */ /* struct PGPROC is declared in proc.h, but must forward-reference it */
typedef struct PGPROC PGPROC; typedef struct PGPROC PGPROC;
...@@ -446,6 +447,8 @@ typedef struct LockInstanceData ...@@ -446,6 +447,8 @@ typedef struct LockInstanceData
LOCKMODE waitLockMode; /* lock awaited by this PGPROC, if any */ LOCKMODE waitLockMode; /* lock awaited by this PGPROC, if any */
BackendId backend; /* backend ID of this PGPROC */ BackendId backend; /* backend ID of this PGPROC */
LocalTransactionId lxid; /* local transaction ID of this PGPROC */ LocalTransactionId lxid; /* local transaction ID of this PGPROC */
TimestampTz waitStart; /* time at which this PGPROC started waiting
* for lock */
int pid; /* pid of this PGPROC */ int pid; /* pid of this PGPROC */
int leaderPid; /* pid of group leader; = pid if no group */ int leaderPid; /* pid of group leader; = pid if no group */
bool fastpath; /* taken via fastpath? */ bool fastpath; /* taken via fastpath? */
......
...@@ -181,6 +181,8 @@ struct PGPROC ...@@ -181,6 +181,8 @@ struct PGPROC
LOCKMODE waitLockMode; /* type of lock we're waiting for */ LOCKMODE waitLockMode; /* type of lock we're waiting for */
LOCKMASK heldLocks; /* bitmask for lock types already held on this LOCKMASK heldLocks; /* bitmask for lock types already held on this
* lock object by this backend */ * lock object by this backend */
pg_atomic_uint64 waitStart; /* time at which wait for lock acquisition
* started */
bool delayChkpt; /* true if this proc delays checkpoint start */ bool delayChkpt; /* true if this proc delays checkpoint start */
......
...@@ -1394,8 +1394,9 @@ pg_locks| SELECT l.locktype, ...@@ -1394,8 +1394,9 @@ pg_locks| SELECT l.locktype,
l.pid, l.pid,
l.mode, l.mode,
l.granted, l.granted,
l.fastpath l.fastpath,
FROM pg_lock_status() l(locktype, database, relation, page, tuple, virtualxid, transactionid, classid, objid, objsubid, virtualtransaction, pid, mode, granted, fastpath); l.waitstart
FROM pg_lock_status() l(locktype, database, relation, page, tuple, virtualxid, transactionid, classid, objid, objsubid, virtualtransaction, pid, mode, granted, fastpath, waitstart);
pg_matviews| SELECT n.nspname AS schemaname, pg_matviews| SELECT n.nspname AS schemaname,
c.relname AS matviewname, c.relname AS matviewname,
pg_get_userbyid(c.relowner) AS matviewowner, pg_get_userbyid(c.relowner) AS matviewowner,
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment