Wake up for latches in CheckpointWriteDelay().

The checkpointer shouldn't ignore its latch. Other backends may be waiting for it to drain the request queue. Hopefully real systems don't have a full queue often, but the condition is reached easily when shared_buffers is small. This involves defining a new wait event, which will appear in the pg_stat_activity view often due to spread checkpoints. Back-patch only to 14. Even though the problem exists in earlier branches too, it's hard to hit there. In 14 we stopped using signal handlers for latches on Linux, *BSD and macOS, which were previously hiding this problem by interrupting the sleep (though not reliably, as the signal could arrive before the sleep begins; precisely the problem latches address). Reported-by: Andres Freund <andres@anarazel.de> Reviewed-by: Andres Freund <andres@anarazel.de> Discussion: https://postgr.es/m/20220226213942.nb7uvb2pamyu26dj%40alap3.anarazel.de

Wake up for latches in CheckpointWriteDelay().
The checkpointer shouldn't ignore its latch. Other backends may be waiting for it to drain the request queue. Hopefully real systems don't have a full queue often, but the condition is reached easily when shared_buffers is small. This involves defining a new wait event, which will appear in the pg_stat_activity view often due to spread checkpoints. Back-patch only to 14. Even though the problem exists in earlier branches too, it's hard to hit there. In 14 we stopped using signal handlers for latches on Linux, *BSD and macOS, which were previously hiding this problem by interrupting the sleep (though not reliably, as the signal could arrive before the sleep begins; precisely the problem latches address). Reported-by: Andres Freund <andres@anarazel.de> Reviewed-by: Andres Freund <andres@anarazel.de> Discussion: https://postgr.es/m/20220226213942.nb7uvb2pamyu26dj%40alap3.anarazel.de
78c0f85e · Thomas Munro · d9f7ad54 · 78c0f85e · 78c0f85e · 78c0f85e
Commit 78c0f85e authored Mar 16, 2022 by Thomas Munro
4 changed files
--- a/doc/src/sgml/monitoring.sgml
+++ b/doc/src/sgml/monitoring.sgml
@@ -2223,6 +2223,10 @@ postgres   27093  0.0  0.0  30096  2752 ?        Ss   11:34   0:00 postgres: ser
      <entry><literal>BaseBackupThrottle</literal></entry>
      <entry>Waiting during base backup when throttling activity.</entry>
     </row>
+     <row>
+      <entry><literal>CheckpointerWriteDelay</literal></entry>
+      <entry>Waiting between writes while performing a checkpoint.</entry>
+     </row>
     <row>
      <entry><literal>PgSleep</literal></entry>
      <entry>Waiting due to a call to <function>pg_sleep</function> or

--- a/src/backend/postmaster/checkpointer.c
+++ b/src/backend/postmaster/checkpointer.c
@@ -490,6 +490,9 @@ CheckpointerMain(void)
 			}
 			ckpt_active = false;
+			/* We may have received an interrupt during the checkpoint. */
+			HandleCheckpointerInterrupts();
 		}
 		/* Check for archive_timeout and switch xlog files if necessary. */
@@ -732,7 +735,10 @@ CheckpointWriteDelay(int flags, double progress)
 		 * Checkpointer and bgwriter are no longer related so take the Big
 		 * Sleep.
 		 */
-		pg_usleep(100000L);
+		WaitLatch(MyLatch, WL_LATCH_SET | WL_EXIT_ON_PM_DEATH | WL_TIMEOUT,
+				  100,
+				  WAIT_EVENT_CHECKPOINT_WRITE_DELAY);
+		ResetLatch(MyLatch);
 	}
 	else if (--absorb_counter <= 0)
 	{

--- a/src/backend/utils/activity/wait_event.c
+++ b/src/backend/utils/activity/wait_event.c
@@ -473,6 +473,9 @@ pgstat_get_wait_timeout(WaitEventTimeout w)
 		case WAIT_EVENT_BASE_BACKUP_THROTTLE:
 			event_name = "BaseBackupThrottle";
 			break;
+		case WAIT_EVENT_CHECKPOINT_WRITE_DELAY:
+			event_name = "CheckpointWriteDelay";
+			break;
 		case WAIT_EVENT_PG_SLEEP:
 			event_name = "PgSleep";
 			break;

--- a/src/include/utils/wait_event.h
+++ b/src/include/utils/wait_event.h
@@ -140,7 +140,8 @@ typedef enum
 	WAIT_EVENT_PG_SLEEP,
 	WAIT_EVENT_RECOVERY_APPLY_DELAY,
 	WAIT_EVENT_RECOVERY_RETRIEVE_RETRY_INTERVAL,
-	WAIT_EVENT_VACUUM_DELAY
+	WAIT_EVENT_VACUUM_DELAY,
+	WAIT_EVENT_CHECKPOINT_WRITE_DELAY
 } WaitEventTimeout;
 /* ----------