Commit c6c9474a authored by Thomas Munro's avatar Thomas Munro

Use condition variables to wait for checkpoints.

Previously we used a polling/sleeping loop to wait for checkpoints
to begin and end, which leads to up to a couple hundred milliseconds
of needless thumb-twiddling.  Use condition variables instead.

Author: Thomas Munro
Reviewed-by: Andres Freund
Discussion: https://postgr.es/m/CA%2BhUKGLY7sDe%2Bbg1K%3DbnEzOofGoo4bJHYh9%2BcDCXJepb6DQmLw%40mail.gmail.com
parent 5655565c
...@@ -1281,7 +1281,7 @@ postgres 27093 0.0 0.0 30096 2752 ? Ss 11:34 0:00 postgres: ser ...@@ -1281,7 +1281,7 @@ postgres 27093 0.0 0.0 30096 2752 ? Ss 11:34 0:00 postgres: ser
<entry>Waiting in an extension.</entry> <entry>Waiting in an extension.</entry>
</row> </row>
<row> <row>
<entry morerows="34"><literal>IPC</literal></entry> <entry morerows="36"><literal>IPC</literal></entry>
<entry><literal>BgWorkerShutdown</literal></entry> <entry><literal>BgWorkerShutdown</literal></entry>
<entry>Waiting for background worker to shut down.</entry> <entry>Waiting for background worker to shut down.</entry>
</row> </row>
...@@ -1293,6 +1293,14 @@ postgres 27093 0.0 0.0 30096 2752 ? Ss 11:34 0:00 postgres: ser ...@@ -1293,6 +1293,14 @@ postgres 27093 0.0 0.0 30096 2752 ? Ss 11:34 0:00 postgres: ser
<entry><literal>BtreePage</literal></entry> <entry><literal>BtreePage</literal></entry>
<entry>Waiting for the page number needed to continue a parallel B-tree scan to become available.</entry> <entry>Waiting for the page number needed to continue a parallel B-tree scan to become available.</entry>
</row> </row>
<row>
<entry><literal>CheckpointDone</literal></entry>
<entry>Waiting for a checkpoint to complete.</entry>
</row>
<row>
<entry><literal>CheckpointStart</literal></entry>
<entry>Waiting for a checkpoint to start.</entry>
</row>
<row> <row>
<entry><literal>ClogGroupUpdate</literal></entry> <entry><literal>ClogGroupUpdate</literal></entry>
<entry>Waiting for group leader to update transaction status at transaction end.</entry> <entry>Waiting for group leader to update transaction status at transaction end.</entry>
......
...@@ -126,6 +126,9 @@ typedef struct ...@@ -126,6 +126,9 @@ typedef struct
int ckpt_flags; /* checkpoint flags, as defined in xlog.h */ int ckpt_flags; /* checkpoint flags, as defined in xlog.h */
ConditionVariable start_cv; /* signaled when ckpt_started advances */
ConditionVariable done_cv; /* signaled when ckpt_done advances */
uint32 num_backend_writes; /* counts user backend buffer writes */ uint32 num_backend_writes; /* counts user backend buffer writes */
uint32 num_backend_fsync; /* counts user backend fsync calls */ uint32 num_backend_fsync; /* counts user backend fsync calls */
...@@ -428,6 +431,8 @@ CheckpointerMain(void) ...@@ -428,6 +431,8 @@ CheckpointerMain(void)
CheckpointerShmem->ckpt_started++; CheckpointerShmem->ckpt_started++;
SpinLockRelease(&CheckpointerShmem->ckpt_lck); SpinLockRelease(&CheckpointerShmem->ckpt_lck);
ConditionVariableBroadcast(&CheckpointerShmem->start_cv);
/* /*
* The end-of-recovery checkpoint is a real checkpoint that's * The end-of-recovery checkpoint is a real checkpoint that's
* performed while we're still in recovery. * performed while we're still in recovery.
...@@ -488,6 +493,8 @@ CheckpointerMain(void) ...@@ -488,6 +493,8 @@ CheckpointerMain(void)
CheckpointerShmem->ckpt_done = CheckpointerShmem->ckpt_started; CheckpointerShmem->ckpt_done = CheckpointerShmem->ckpt_started;
SpinLockRelease(&CheckpointerShmem->ckpt_lck); SpinLockRelease(&CheckpointerShmem->ckpt_lck);
ConditionVariableBroadcast(&CheckpointerShmem->done_cv);
if (ckpt_performed) if (ckpt_performed)
{ {
/* /*
...@@ -915,6 +922,8 @@ CheckpointerShmemInit(void) ...@@ -915,6 +922,8 @@ CheckpointerShmemInit(void)
MemSet(CheckpointerShmem, 0, size); MemSet(CheckpointerShmem, 0, size);
SpinLockInit(&CheckpointerShmem->ckpt_lck); SpinLockInit(&CheckpointerShmem->ckpt_lck);
CheckpointerShmem->max_requests = NBuffers; CheckpointerShmem->max_requests = NBuffers;
ConditionVariableInit(&CheckpointerShmem->start_cv);
ConditionVariableInit(&CheckpointerShmem->done_cv);
} }
} }
...@@ -1023,6 +1032,7 @@ RequestCheckpoint(int flags) ...@@ -1023,6 +1032,7 @@ RequestCheckpoint(int flags)
new_failed; new_failed;
/* Wait for a new checkpoint to start. */ /* Wait for a new checkpoint to start. */
ConditionVariablePrepareToSleep(&CheckpointerShmem->start_cv);
for (;;) for (;;)
{ {
SpinLockAcquire(&CheckpointerShmem->ckpt_lck); SpinLockAcquire(&CheckpointerShmem->ckpt_lck);
...@@ -1032,13 +1042,15 @@ RequestCheckpoint(int flags) ...@@ -1032,13 +1042,15 @@ RequestCheckpoint(int flags)
if (new_started != old_started) if (new_started != old_started)
break; break;
CHECK_FOR_INTERRUPTS(); ConditionVariableSleep(&CheckpointerShmem->start_cv,
pg_usleep(100000L); WAIT_EVENT_CHECKPOINT_START);
} }
ConditionVariableCancelSleep();
/* /*
* We are waiting for ckpt_done >= new_started, in a modulo sense. * We are waiting for ckpt_done >= new_started, in a modulo sense.
*/ */
ConditionVariablePrepareToSleep(&CheckpointerShmem->done_cv);
for (;;) for (;;)
{ {
int new_done; int new_done;
...@@ -1051,9 +1063,10 @@ RequestCheckpoint(int flags) ...@@ -1051,9 +1063,10 @@ RequestCheckpoint(int flags)
if (new_done - new_started >= 0) if (new_done - new_started >= 0)
break; break;
CHECK_FOR_INTERRUPTS(); ConditionVariableSleep(&CheckpointerShmem->done_cv,
pg_usleep(100000L); WAIT_EVENT_CHECKPOINT_DONE);
} }
ConditionVariableCancelSleep();
if (new_failed != old_failed) if (new_failed != old_failed)
ereport(ERROR, ereport(ERROR,
......
...@@ -3623,6 +3623,12 @@ pgstat_get_wait_ipc(WaitEventIPC w) ...@@ -3623,6 +3623,12 @@ pgstat_get_wait_ipc(WaitEventIPC w)
case WAIT_EVENT_BTREE_PAGE: case WAIT_EVENT_BTREE_PAGE:
event_name = "BtreePage"; event_name = "BtreePage";
break; break;
case WAIT_EVENT_CHECKPOINT_DONE:
event_name = "CheckpointDone";
break;
case WAIT_EVENT_CHECKPOINT_START:
event_name = "CheckpointStart";
break;
case WAIT_EVENT_CLOG_GROUP_UPDATE: case WAIT_EVENT_CLOG_GROUP_UPDATE:
event_name = "ClogGroupUpdate"; event_name = "ClogGroupUpdate";
break; break;
......
...@@ -817,6 +817,8 @@ typedef enum ...@@ -817,6 +817,8 @@ typedef enum
WAIT_EVENT_BGWORKER_STARTUP, WAIT_EVENT_BGWORKER_STARTUP,
WAIT_EVENT_BTREE_PAGE, WAIT_EVENT_BTREE_PAGE,
WAIT_EVENT_CLOG_GROUP_UPDATE, WAIT_EVENT_CLOG_GROUP_UPDATE,
WAIT_EVENT_CHECKPOINT_DONE,
WAIT_EVENT_CHECKPOINT_START,
WAIT_EVENT_EXECUTE_GATHER, WAIT_EVENT_EXECUTE_GATHER,
WAIT_EVENT_HASH_BATCH_ALLOCATING, WAIT_EVENT_HASH_BATCH_ALLOCATING,
WAIT_EVENT_HASH_BATCH_ELECTING, WAIT_EVENT_HASH_BATCH_ELECTING,
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment