Commit 4de82f7d authored by Simon Riggs's avatar Simon Riggs

Wakeup WALWriter as needed for asynchronous commit performance.

Previously we waited for wal_writer_delay before flushing WAL. Now
we also wake WALWriter as soon as a WAL buffer page has filled.
Significant effect observed on performance of asynchronous commits
by Robert Haas, attributed to the ability to set hint bits on tuples
earlier and so reducing contention caused by clog lookups.
parent 02d88efe
...@@ -432,6 +432,11 @@ typedef struct XLogCtlData ...@@ -432,6 +432,11 @@ typedef struct XLogCtlData
*/ */
Latch recoveryWakeupLatch; Latch recoveryWakeupLatch;
/*
* WALWriterLatch is used to wake up the WALWriter to write some WAL.
*/
Latch WALWriterLatch;
/* /*
* During recovery, we keep a copy of the latest checkpoint record here. * During recovery, we keep a copy of the latest checkpoint record here.
* Used by the background writer when it wants to create a restartpoint. * Used by the background writer when it wants to create a restartpoint.
...@@ -1916,19 +1921,35 @@ XLogWrite(XLogwrtRqst WriteRqst, bool flexible, bool xlog_switch) ...@@ -1916,19 +1921,35 @@ XLogWrite(XLogwrtRqst WriteRqst, bool flexible, bool xlog_switch)
} }
/* /*
* Record the LSN for an asynchronous transaction commit/abort. * Record the LSN for an asynchronous transaction commit/abort
* and nudge the WALWriter if there is a complete page to write.
* (This should not be called for for synchronous commits.) * (This should not be called for for synchronous commits.)
*/ */
void void
XLogSetAsyncXactLSN(XLogRecPtr asyncXactLSN) XLogSetAsyncXactLSN(XLogRecPtr asyncXactLSN)
{ {
XLogRecPtr WriteRqstPtr = asyncXactLSN;
/* use volatile pointer to prevent code rearrangement */ /* use volatile pointer to prevent code rearrangement */
volatile XLogCtlData *xlogctl = XLogCtl; volatile XLogCtlData *xlogctl = XLogCtl;
SpinLockAcquire(&xlogctl->info_lck); SpinLockAcquire(&xlogctl->info_lck);
LogwrtResult = xlogctl->LogwrtResult;
if (XLByteLT(xlogctl->asyncXactLSN, asyncXactLSN)) if (XLByteLT(xlogctl->asyncXactLSN, asyncXactLSN))
xlogctl->asyncXactLSN = asyncXactLSN; xlogctl->asyncXactLSN = asyncXactLSN;
SpinLockRelease(&xlogctl->info_lck); SpinLockRelease(&xlogctl->info_lck);
/* back off to last completed page boundary */
WriteRqstPtr.xrecoff -= WriteRqstPtr.xrecoff % XLOG_BLCKSZ;
/* if we have already flushed that far, we're done */
if (XLByteLE(WriteRqstPtr, LogwrtResult.Flush))
return;
/*
* Nudge the WALWriter if we have a full page of WAL to write.
*/
SetLatch(&XLogCtl->WALWriterLatch);
} }
/* /*
...@@ -5072,6 +5093,7 @@ XLOGShmemInit(void) ...@@ -5072,6 +5093,7 @@ XLOGShmemInit(void)
XLogCtl->Insert.currpage = (XLogPageHeader) (XLogCtl->pages); XLogCtl->Insert.currpage = (XLogPageHeader) (XLogCtl->pages);
SpinLockInit(&XLogCtl->info_lck); SpinLockInit(&XLogCtl->info_lck);
InitSharedLatch(&XLogCtl->recoveryWakeupLatch); InitSharedLatch(&XLogCtl->recoveryWakeupLatch);
InitSharedLatch(&XLogCtl->WALWriterLatch);
/* /*
* If we are not in bootstrap mode, pg_control should already exist. Read * If we are not in bootstrap mode, pg_control should already exist. Read
...@@ -10013,3 +10035,12 @@ WakeupRecovery(void) ...@@ -10013,3 +10035,12 @@ WakeupRecovery(void)
{ {
SetLatch(&XLogCtl->recoveryWakeupLatch); SetLatch(&XLogCtl->recoveryWakeupLatch);
} }
/*
* Manage the WALWriterLatch
*/
Latch *
WALWriterLatch(void)
{
return &XLogCtl->WALWriterLatch;
}
...@@ -11,7 +11,8 @@ ...@@ -11,7 +11,8 @@
* *
* Note that as with the bgwriter for shared buffers, regular backends are * Note that as with the bgwriter for shared buffers, regular backends are
* still empowered to issue WAL writes and fsyncs when the walwriter doesn't * still empowered to issue WAL writes and fsyncs when the walwriter doesn't
* keep up. * keep up. This means that the WALWriter is not an essential process and
* can shutdown quickly when requested.
* *
* Because the walwriter's cycle is directly linked to the maximum delay * Because the walwriter's cycle is directly linked to the maximum delay
* before async-commit transactions are guaranteed committed, it's probably * before async-commit transactions are guaranteed committed, it's probably
...@@ -76,7 +77,6 @@ static void wal_quickdie(SIGNAL_ARGS); ...@@ -76,7 +77,6 @@ static void wal_quickdie(SIGNAL_ARGS);
static void WalSigHupHandler(SIGNAL_ARGS); static void WalSigHupHandler(SIGNAL_ARGS);
static void WalShutdownHandler(SIGNAL_ARGS); static void WalShutdownHandler(SIGNAL_ARGS);
/* /*
* Main entry point for walwriter process * Main entry point for walwriter process
* *
...@@ -89,6 +89,8 @@ WalWriterMain(void) ...@@ -89,6 +89,8 @@ WalWriterMain(void)
sigjmp_buf local_sigjmp_buf; sigjmp_buf local_sigjmp_buf;
MemoryContext walwriter_context; MemoryContext walwriter_context;
InitLatch(WALWriterLatch()); /* initialize latch used in main loop */
/* /*
* If possible, make this process a group leader, so that the postmaster * If possible, make this process a group leader, so that the postmaster
* can signal any child processes too. (walwriter probably never has any * can signal any child processes too. (walwriter probably never has any
...@@ -220,7 +222,7 @@ WalWriterMain(void) ...@@ -220,7 +222,7 @@ WalWriterMain(void)
*/ */
for (;;) for (;;)
{ {
long udelay; ResetLatch(WALWriterLatch());
/* /*
* Emergency bailout if postmaster has died. This is to avoid the * Emergency bailout if postmaster has died. This is to avoid the
...@@ -248,20 +250,9 @@ WalWriterMain(void) ...@@ -248,20 +250,9 @@ WalWriterMain(void)
*/ */
XLogBackgroundFlush(); XLogBackgroundFlush();
/* (void) WaitLatch(WALWriterLatch(),
* Delay until time to do something more, but fall out of delay WL_LATCH_SET | WL_TIMEOUT | WL_POSTMASTER_DEATH,
* reasonably quickly if signaled. WalWriterDelay /* ms */);
*/
udelay = WalWriterDelay * 1000L;
while (udelay > 999999L)
{
if (got_SIGHUP || shutdown_requested)
break;
pg_usleep(1000000L);
udelay -= 1000000L;
}
if (!(got_SIGHUP || shutdown_requested))
pg_usleep(udelay);
} }
} }
...@@ -308,6 +299,7 @@ static void ...@@ -308,6 +299,7 @@ static void
WalSigHupHandler(SIGNAL_ARGS) WalSigHupHandler(SIGNAL_ARGS)
{ {
got_SIGHUP = true; got_SIGHUP = true;
SetLatch(WALWriterLatch());
} }
/* SIGTERM: set flag to exit normally */ /* SIGTERM: set flag to exit normally */
...@@ -315,4 +307,5 @@ static void ...@@ -315,4 +307,5 @@ static void
WalShutdownHandler(SIGNAL_ARGS) WalShutdownHandler(SIGNAL_ARGS)
{ {
shutdown_requested = true; shutdown_requested = true;
SetLatch(WALWriterLatch());
} }
...@@ -16,6 +16,7 @@ ...@@ -16,6 +16,7 @@
#include "datatype/timestamp.h" #include "datatype/timestamp.h"
#include "lib/stringinfo.h" #include "lib/stringinfo.h"
#include "storage/buf.h" #include "storage/buf.h"
#include "storage/latch.h"
#include "utils/pg_crc.h" #include "utils/pg_crc.h"
/* /*
...@@ -319,6 +320,7 @@ extern TimeLineID GetRecoveryTargetTLI(void); ...@@ -319,6 +320,7 @@ extern TimeLineID GetRecoveryTargetTLI(void);
extern bool CheckPromoteSignal(void); extern bool CheckPromoteSignal(void);
extern void WakeupRecovery(void); extern void WakeupRecovery(void);
extern Latch *WALWriterLatch(void);
/* /*
* Starting/stopping a base backup * Starting/stopping a base backup
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment