Commit 806a2aee authored by Simon Riggs's avatar Simon Riggs

Split work of bgwriter between 2 processes: bgwriter and checkpointer.

bgwriter is now a much less important process, responsible for page
cleaning duties only. checkpointer is now responsible for checkpoints
and so has a key role in shutdown. Later patches will correct doc
references to the now old idea that bgwriter performs checkpoints.
Has beneficial effect on performance at high write rates, but mainly
refactoring to more easily allow changes for power reduction by
simplifying previously tortuous code around required to allow page
cleaning and checkpointing to time slice in the same process.

Patch by me, Review by Dickson Guedes
parent 589adb86
...@@ -315,6 +315,9 @@ AuxiliaryProcessMain(int argc, char *argv[]) ...@@ -315,6 +315,9 @@ AuxiliaryProcessMain(int argc, char *argv[])
case BgWriterProcess: case BgWriterProcess:
statmsg = "writer process"; statmsg = "writer process";
break; break;
case CheckpointerProcess:
statmsg = "checkpointer process";
break;
case WalWriterProcess: case WalWriterProcess:
statmsg = "wal writer process"; statmsg = "wal writer process";
break; break;
...@@ -415,6 +418,11 @@ AuxiliaryProcessMain(int argc, char *argv[]) ...@@ -415,6 +418,11 @@ AuxiliaryProcessMain(int argc, char *argv[])
BackgroundWriterMain(); BackgroundWriterMain();
proc_exit(1); /* should never return */ proc_exit(1); /* should never return */
case CheckpointerProcess:
/* don't set signals, checkpointer has its own agenda */
CheckpointerMain();
proc_exit(1); /* should never return */
case WalWriterProcess: case WalWriterProcess:
/* don't set signals, walwriter has its own agenda */ /* don't set signals, walwriter has its own agenda */
InitXLOGAccess(); InitXLOGAccess();
......
...@@ -13,6 +13,6 @@ top_builddir = ../../.. ...@@ -13,6 +13,6 @@ top_builddir = ../../..
include $(top_builddir)/src/Makefile.global include $(top_builddir)/src/Makefile.global
OBJS = autovacuum.o bgwriter.o fork_process.o pgarch.o pgstat.o postmaster.o \ OBJS = autovacuum.o bgwriter.o fork_process.o pgarch.o pgstat.o postmaster.o \
syslogger.o walwriter.o syslogger.o walwriter.o checkpointer.o
include $(top_srcdir)/src/backend/common.mk include $(top_srcdir)/src/backend/common.mk
This diff is collapsed.
...@@ -208,6 +208,7 @@ char *output_config_variable = NULL; ...@@ -208,6 +208,7 @@ char *output_config_variable = NULL;
/* PIDs of special child processes; 0 when not running */ /* PIDs of special child processes; 0 when not running */
static pid_t StartupPID = 0, static pid_t StartupPID = 0,
BgWriterPID = 0, BgWriterPID = 0,
CheckpointerPID = 0,
WalWriterPID = 0, WalWriterPID = 0,
WalReceiverPID = 0, WalReceiverPID = 0,
AutoVacPID = 0, AutoVacPID = 0,
...@@ -279,7 +280,7 @@ typedef enum ...@@ -279,7 +280,7 @@ typedef enum
PM_WAIT_BACKUP, /* waiting for online backup mode to end */ PM_WAIT_BACKUP, /* waiting for online backup mode to end */
PM_WAIT_READONLY, /* waiting for read only backends to exit */ PM_WAIT_READONLY, /* waiting for read only backends to exit */
PM_WAIT_BACKENDS, /* waiting for live backends to exit */ PM_WAIT_BACKENDS, /* waiting for live backends to exit */
PM_SHUTDOWN, /* waiting for bgwriter to do shutdown ckpt */ PM_SHUTDOWN, /* waiting for checkpointer to do shutdown ckpt */
PM_SHUTDOWN_2, /* waiting for archiver and walsenders to PM_SHUTDOWN_2, /* waiting for archiver and walsenders to
* finish */ * finish */
PM_WAIT_DEAD_END, /* waiting for dead_end children to exit */ PM_WAIT_DEAD_END, /* waiting for dead_end children to exit */
...@@ -465,6 +466,7 @@ static void ShmemBackendArrayRemove(Backend *bn); ...@@ -465,6 +466,7 @@ static void ShmemBackendArrayRemove(Backend *bn);
#define StartupDataBase() StartChildProcess(StartupProcess) #define StartupDataBase() StartChildProcess(StartupProcess)
#define StartBackgroundWriter() StartChildProcess(BgWriterProcess) #define StartBackgroundWriter() StartChildProcess(BgWriterProcess)
#define StartCheckpointer() StartChildProcess(CheckpointerProcess)
#define StartWalWriter() StartChildProcess(WalWriterProcess) #define StartWalWriter() StartChildProcess(WalWriterProcess)
#define StartWalReceiver() StartChildProcess(WalReceiverProcess) #define StartWalReceiver() StartChildProcess(WalReceiverProcess)
...@@ -1028,8 +1030,8 @@ PostmasterMain(int argc, char *argv[]) ...@@ -1028,8 +1030,8 @@ PostmasterMain(int argc, char *argv[])
* CAUTION: when changing this list, check for side-effects on the signal * CAUTION: when changing this list, check for side-effects on the signal
* handling setup of child processes. See tcop/postgres.c, * handling setup of child processes. See tcop/postgres.c,
* bootstrap/bootstrap.c, postmaster/bgwriter.c, postmaster/walwriter.c, * bootstrap/bootstrap.c, postmaster/bgwriter.c, postmaster/walwriter.c,
* postmaster/autovacuum.c, postmaster/pgarch.c, postmaster/pgstat.c, and * postmaster/autovacuum.c, postmaster/pgarch.c, postmaster/pgstat.c,
* postmaster/syslogger.c. * postmaster/syslogger.c and postmaster/checkpointer.c
*/ */
pqinitmask(); pqinitmask();
PG_SETMASK(&BlockSig); PG_SETMASK(&BlockSig);
...@@ -1366,10 +1368,14 @@ ServerLoop(void) ...@@ -1366,10 +1368,14 @@ ServerLoop(void)
* state that prevents it, start one. It doesn't matter if this * state that prevents it, start one. It doesn't matter if this
* fails, we'll just try again later. * fails, we'll just try again later.
*/ */
if (BgWriterPID == 0 && if (pmState == PM_RUN || pmState == PM_RECOVERY ||
(pmState == PM_RUN || pmState == PM_RECOVERY || pmState == PM_HOT_STANDBY)
pmState == PM_HOT_STANDBY)) {
if (BgWriterPID == 0)
BgWriterPID = StartBackgroundWriter(); BgWriterPID = StartBackgroundWriter();
if (CheckpointerPID == 0)
CheckpointerPID = StartCheckpointer();
}
/* /*
* Likewise, if we have lost the walwriter process, try to start a new * Likewise, if we have lost the walwriter process, try to start a new
...@@ -2047,6 +2053,8 @@ SIGHUP_handler(SIGNAL_ARGS) ...@@ -2047,6 +2053,8 @@ SIGHUP_handler(SIGNAL_ARGS)
signal_child(StartupPID, SIGHUP); signal_child(StartupPID, SIGHUP);
if (BgWriterPID != 0) if (BgWriterPID != 0)
signal_child(BgWriterPID, SIGHUP); signal_child(BgWriterPID, SIGHUP);
if (CheckpointerPID != 0)
signal_child(CheckpointerPID, SIGHUP);
if (WalWriterPID != 0) if (WalWriterPID != 0)
signal_child(WalWriterPID, SIGHUP); signal_child(WalWriterPID, SIGHUP);
if (WalReceiverPID != 0) if (WalReceiverPID != 0)
...@@ -2119,6 +2127,8 @@ pmdie(SIGNAL_ARGS) ...@@ -2119,6 +2127,8 @@ pmdie(SIGNAL_ARGS)
/* and the walwriter too */ /* and the walwriter too */
if (WalWriterPID != 0) if (WalWriterPID != 0)
signal_child(WalWriterPID, SIGTERM); signal_child(WalWriterPID, SIGTERM);
if (BgWriterPID != 0)
signal_child(BgWriterPID, SIGTERM);
/* /*
* If we're in recovery, we can't kill the startup process * If we're in recovery, we can't kill the startup process
...@@ -2159,9 +2169,11 @@ pmdie(SIGNAL_ARGS) ...@@ -2159,9 +2169,11 @@ pmdie(SIGNAL_ARGS)
signal_child(StartupPID, SIGTERM); signal_child(StartupPID, SIGTERM);
if (WalReceiverPID != 0) if (WalReceiverPID != 0)
signal_child(WalReceiverPID, SIGTERM); signal_child(WalReceiverPID, SIGTERM);
if (BgWriterPID != 0)
signal_child(BgWriterPID, SIGTERM);
if (pmState == PM_RECOVERY) if (pmState == PM_RECOVERY)
{ {
/* only bgwriter is active in this state */ /* only checkpointer is active in this state */
pmState = PM_WAIT_BACKENDS; pmState = PM_WAIT_BACKENDS;
} }
else if (pmState == PM_RUN || else if (pmState == PM_RUN ||
...@@ -2206,6 +2218,8 @@ pmdie(SIGNAL_ARGS) ...@@ -2206,6 +2218,8 @@ pmdie(SIGNAL_ARGS)
signal_child(StartupPID, SIGQUIT); signal_child(StartupPID, SIGQUIT);
if (BgWriterPID != 0) if (BgWriterPID != 0)
signal_child(BgWriterPID, SIGQUIT); signal_child(BgWriterPID, SIGQUIT);
if (CheckpointerPID != 0)
signal_child(CheckpointerPID, SIGQUIT);
if (WalWriterPID != 0) if (WalWriterPID != 0)
signal_child(WalWriterPID, SIGQUIT); signal_child(WalWriterPID, SIGQUIT);
if (WalReceiverPID != 0) if (WalReceiverPID != 0)
...@@ -2336,12 +2350,14 @@ reaper(SIGNAL_ARGS) ...@@ -2336,12 +2350,14 @@ reaper(SIGNAL_ARGS)
} }
/* /*
* Crank up the background writer, if we didn't do that already * Crank up background tasks, if we didn't do that already
* when we entered consistent recovery state. It doesn't matter * when we entered consistent recovery state. It doesn't matter
* if this fails, we'll just try again later. * if this fails, we'll just try again later.
*/ */
if (BgWriterPID == 0) if (BgWriterPID == 0)
BgWriterPID = StartBackgroundWriter(); BgWriterPID = StartBackgroundWriter();
if (CheckpointerPID == 0)
CheckpointerPID = StartCheckpointer();
/* /*
* Likewise, start other special children as needed. In a restart * Likewise, start other special children as needed. In a restart
...@@ -2369,10 +2385,22 @@ reaper(SIGNAL_ARGS) ...@@ -2369,10 +2385,22 @@ reaper(SIGNAL_ARGS)
if (pid == BgWriterPID) if (pid == BgWriterPID)
{ {
BgWriterPID = 0; BgWriterPID = 0;
if (!EXIT_STATUS_0(exitstatus))
HandleChildCrash(pid, exitstatus,
_("background writer process"));
continue;
}
/*
* Was it the checkpointer?
*/
if (pid == CheckpointerPID)
{
CheckpointerPID = 0;
if (EXIT_STATUS_0(exitstatus) && pmState == PM_SHUTDOWN) if (EXIT_STATUS_0(exitstatus) && pmState == PM_SHUTDOWN)
{ {
/* /*
* OK, we saw normal exit of the bgwriter after it's been told * OK, we saw normal exit of the checkpointer after it's been told
* to shut down. We expect that it wrote a shutdown * to shut down. We expect that it wrote a shutdown
* checkpoint. (If for some reason it didn't, recovery will * checkpoint. (If for some reason it didn't, recovery will
* occur on next postmaster start.) * occur on next postmaster start.)
...@@ -2409,11 +2437,11 @@ reaper(SIGNAL_ARGS) ...@@ -2409,11 +2437,11 @@ reaper(SIGNAL_ARGS)
else else
{ {
/* /*
* Any unexpected exit of the bgwriter (including FATAL exit) * Any unexpected exit of the checkpointer (including FATAL exit)
* is treated as a crash. * is treated as a crash.
*/ */
HandleChildCrash(pid, exitstatus, HandleChildCrash(pid, exitstatus,
_("background writer process")); _("checkpointer process"));
} }
continue; continue;
...@@ -2597,8 +2625,8 @@ CleanupBackend(int pid, ...@@ -2597,8 +2625,8 @@ CleanupBackend(int pid,
} }
/* /*
* HandleChildCrash -- cleanup after failed backend, bgwriter, walwriter, * HandleChildCrash -- cleanup after failed backend, bgwriter, checkpointer,
* or autovacuum. * walwriter or autovacuum.
* *
* The objectives here are to clean up our local state about the child * The objectives here are to clean up our local state about the child
* process, and to signal all other remaining children to quickdie. * process, and to signal all other remaining children to quickdie.
...@@ -2691,6 +2719,18 @@ HandleChildCrash(int pid, int exitstatus, const char *procname) ...@@ -2691,6 +2719,18 @@ HandleChildCrash(int pid, int exitstatus, const char *procname)
signal_child(BgWriterPID, (SendStop ? SIGSTOP : SIGQUIT)); signal_child(BgWriterPID, (SendStop ? SIGSTOP : SIGQUIT));
} }
/* Take care of the checkpointer too */
if (pid == CheckpointerPID)
CheckpointerPID = 0;
else if (CheckpointerPID != 0 && !FatalError)
{
ereport(DEBUG2,
(errmsg_internal("sending %s to process %d",
(SendStop ? "SIGSTOP" : "SIGQUIT"),
(int) CheckpointerPID)));
signal_child(CheckpointerPID, (SendStop ? SIGSTOP : SIGQUIT));
}
/* Take care of the walwriter too */ /* Take care of the walwriter too */
if (pid == WalWriterPID) if (pid == WalWriterPID)
WalWriterPID = 0; WalWriterPID = 0;
...@@ -2887,9 +2927,10 @@ PostmasterStateMachine(void) ...@@ -2887,9 +2927,10 @@ PostmasterStateMachine(void)
{ {
/* /*
* PM_WAIT_BACKENDS state ends when we have no regular backends * PM_WAIT_BACKENDS state ends when we have no regular backends
* (including autovac workers) and no walwriter or autovac launcher. * (including autovac workers) and no walwriter, autovac launcher
* If we are doing crash recovery then we expect the bgwriter to exit * or bgwriter. If we are doing crash recovery then we expect the
* too, otherwise not. The archiver, stats, and syslogger processes * checkpointer to exit as well, otherwise not.
* The archiver, stats, and syslogger processes
* are disregarded since they are not connected to shared memory; we * are disregarded since they are not connected to shared memory; we
* also disregard dead_end children here. Walsenders are also * also disregard dead_end children here. Walsenders are also
* disregarded, they will be terminated later after writing the * disregarded, they will be terminated later after writing the
...@@ -2898,7 +2939,8 @@ PostmasterStateMachine(void) ...@@ -2898,7 +2939,8 @@ PostmasterStateMachine(void)
if (CountChildren(BACKEND_TYPE_NORMAL | BACKEND_TYPE_AUTOVAC) == 0 && if (CountChildren(BACKEND_TYPE_NORMAL | BACKEND_TYPE_AUTOVAC) == 0 &&
StartupPID == 0 && StartupPID == 0 &&
WalReceiverPID == 0 && WalReceiverPID == 0 &&
(BgWriterPID == 0 || !FatalError) && BgWriterPID == 0 &&
(CheckpointerPID == 0 || !FatalError) &&
WalWriterPID == 0 && WalWriterPID == 0 &&
AutoVacPID == 0) AutoVacPID == 0)
{ {
...@@ -2920,22 +2962,22 @@ PostmasterStateMachine(void) ...@@ -2920,22 +2962,22 @@ PostmasterStateMachine(void)
/* /*
* If we get here, we are proceeding with normal shutdown. All * If we get here, we are proceeding with normal shutdown. All
* the regular children are gone, and it's time to tell the * the regular children are gone, and it's time to tell the
* bgwriter to do a shutdown checkpoint. * checkpointer to do a shutdown checkpoint.
*/ */
Assert(Shutdown > NoShutdown); Assert(Shutdown > NoShutdown);
/* Start the bgwriter if not running */ /* Start the checkpointer if not running */
if (BgWriterPID == 0) if (CheckpointerPID == 0)
BgWriterPID = StartBackgroundWriter(); CheckpointerPID = StartCheckpointer();
/* And tell it to shut down */ /* And tell it to shut down */
if (BgWriterPID != 0) if (CheckpointerPID != 0)
{ {
signal_child(BgWriterPID, SIGUSR2); signal_child(CheckpointerPID, SIGUSR2);
pmState = PM_SHUTDOWN; pmState = PM_SHUTDOWN;
} }
else else
{ {
/* /*
* If we failed to fork a bgwriter, just shut down. Any * If we failed to fork a checkpointer, just shut down. Any
* required cleanup will happen at next restart. We set * required cleanup will happen at next restart. We set
* FatalError so that an "abnormal shutdown" message gets * FatalError so that an "abnormal shutdown" message gets
* logged when we exit. * logged when we exit.
...@@ -2994,6 +3036,7 @@ PostmasterStateMachine(void) ...@@ -2994,6 +3036,7 @@ PostmasterStateMachine(void)
Assert(StartupPID == 0); Assert(StartupPID == 0);
Assert(WalReceiverPID == 0); Assert(WalReceiverPID == 0);
Assert(BgWriterPID == 0); Assert(BgWriterPID == 0);
Assert(CheckpointerPID == 0);
Assert(WalWriterPID == 0); Assert(WalWriterPID == 0);
Assert(AutoVacPID == 0); Assert(AutoVacPID == 0);
/* syslogger is not considered here */ /* syslogger is not considered here */
...@@ -4173,6 +4216,8 @@ sigusr1_handler(SIGNAL_ARGS) ...@@ -4173,6 +4216,8 @@ sigusr1_handler(SIGNAL_ARGS)
*/ */
Assert(BgWriterPID == 0); Assert(BgWriterPID == 0);
BgWriterPID = StartBackgroundWriter(); BgWriterPID = StartBackgroundWriter();
Assert(CheckpointerPID == 0);
CheckpointerPID = StartCheckpointer();
pmState = PM_RECOVERY; pmState = PM_RECOVERY;
} }
...@@ -4459,6 +4504,10 @@ StartChildProcess(AuxProcType type) ...@@ -4459,6 +4504,10 @@ StartChildProcess(AuxProcType type)
ereport(LOG, ereport(LOG,
(errmsg("could not fork background writer process: %m"))); (errmsg("could not fork background writer process: %m")));
break; break;
case CheckpointerProcess:
ereport(LOG,
(errmsg("could not fork checkpointer process: %m")));
break;
case WalWriterProcess: case WalWriterProcess:
ereport(LOG, ereport(LOG,
(errmsg("could not fork WAL writer process: %m"))); (errmsg("could not fork WAL writer process: %m")));
......
...@@ -1278,11 +1278,9 @@ BufferSync(int flags) ...@@ -1278,11 +1278,9 @@ BufferSync(int flags)
break; break;
/* /*
* Perform normal bgwriter duties and sleep to throttle our * Sleep to throttle our I/O rate.
* I/O rate.
*/ */
CheckpointWriteDelay(flags, CheckpointWriteDelay(flags, (double) num_written / num_to_write);
(double) num_written / num_to_write);
} }
} }
......
...@@ -38,7 +38,7 @@ ...@@ -38,7 +38,7 @@
/* /*
* Special values for the segno arg to RememberFsyncRequest. * Special values for the segno arg to RememberFsyncRequest.
* *
* Note that CompactBgwriterRequestQueue assumes that it's OK to remove an * Note that CompactcheckpointerRequestQueue assumes that it's OK to remove an
* fsync request from the queue if an identical, subsequent request is found. * fsync request from the queue if an identical, subsequent request is found.
* See comments there before making changes here. * See comments there before making changes here.
*/ */
...@@ -77,7 +77,7 @@ ...@@ -77,7 +77,7 @@
* Inactive segments are those that once contained data but are currently * Inactive segments are those that once contained data but are currently
* not needed because of an mdtruncate() operation. The reason for leaving * not needed because of an mdtruncate() operation. The reason for leaving
* them present at size zero, rather than unlinking them, is that other * them present at size zero, rather than unlinking them, is that other
* backends and/or the bgwriter might be holding open file references to * backends and/or the checkpointer might be holding open file references to
* such segments. If the relation expands again after mdtruncate(), such * such segments. If the relation expands again after mdtruncate(), such
* that a deactivated segment becomes active again, it is important that * that a deactivated segment becomes active again, it is important that
* such file references still be valid --- else data might get written * such file references still be valid --- else data might get written
...@@ -111,7 +111,7 @@ static MemoryContext MdCxt; /* context for all md.c allocations */ ...@@ -111,7 +111,7 @@ static MemoryContext MdCxt; /* context for all md.c allocations */
/* /*
* In some contexts (currently, standalone backends and the bgwriter process) * In some contexts (currently, standalone backends and the checkpointer process)
* we keep track of pending fsync operations: we need to remember all relation * we keep track of pending fsync operations: we need to remember all relation
* segments that have been written since the last checkpoint, so that we can * segments that have been written since the last checkpoint, so that we can
* fsync them down to disk before completing the next checkpoint. This hash * fsync them down to disk before completing the next checkpoint. This hash
...@@ -123,7 +123,7 @@ static MemoryContext MdCxt; /* context for all md.c allocations */ ...@@ -123,7 +123,7 @@ static MemoryContext MdCxt; /* context for all md.c allocations */
* a hash table, because we don't expect there to be any duplicate requests. * a hash table, because we don't expect there to be any duplicate requests.
* *
* (Regular backends do not track pending operations locally, but forward * (Regular backends do not track pending operations locally, but forward
* them to the bgwriter.) * them to the checkpointer.)
*/ */
typedef struct typedef struct
{ {
...@@ -194,7 +194,7 @@ mdinit(void) ...@@ -194,7 +194,7 @@ mdinit(void)
* Create pending-operations hashtable if we need it. Currently, we need * Create pending-operations hashtable if we need it. Currently, we need
* it if we are standalone (not under a postmaster) OR if we are a * it if we are standalone (not under a postmaster) OR if we are a
* bootstrap-mode subprocess of a postmaster (that is, a startup or * bootstrap-mode subprocess of a postmaster (that is, a startup or
* bgwriter process). * checkpointer process).
*/ */
if (!IsUnderPostmaster || IsBootstrapProcessingMode()) if (!IsUnderPostmaster || IsBootstrapProcessingMode())
{ {
...@@ -214,10 +214,10 @@ mdinit(void) ...@@ -214,10 +214,10 @@ mdinit(void)
} }
/* /*
* In archive recovery, we rely on bgwriter to do fsyncs, but we will have * In archive recovery, we rely on checkpointer to do fsyncs, but we will have
* already created the pendingOpsTable during initialization of the startup * already created the pendingOpsTable during initialization of the startup
* process. Calling this function drops the local pendingOpsTable so that * process. Calling this function drops the local pendingOpsTable so that
* subsequent requests will be forwarded to bgwriter. * subsequent requests will be forwarded to checkpointer.
*/ */
void void
SetForwardFsyncRequests(void) SetForwardFsyncRequests(void)
...@@ -765,9 +765,9 @@ mdnblocks(SMgrRelation reln, ForkNumber forknum) ...@@ -765,9 +765,9 @@ mdnblocks(SMgrRelation reln, ForkNumber forknum)
* NOTE: this assumption could only be wrong if another backend has * NOTE: this assumption could only be wrong if another backend has
* truncated the relation. We rely on higher code levels to handle that * truncated the relation. We rely on higher code levels to handle that
* scenario by closing and re-opening the md fd, which is handled via * scenario by closing and re-opening the md fd, which is handled via
* relcache flush. (Since the bgwriter doesn't participate in relcache * relcache flush. (Since the checkpointer doesn't participate in relcache
* flush, it could have segment chain entries for inactive segments; * flush, it could have segment chain entries for inactive segments;
* that's OK because the bgwriter never needs to compute relation size.) * that's OK because the checkpointer never needs to compute relation size.)
*/ */
while (v->mdfd_chain != NULL) while (v->mdfd_chain != NULL)
{ {
...@@ -957,7 +957,7 @@ mdsync(void) ...@@ -957,7 +957,7 @@ mdsync(void)
elog(ERROR, "cannot sync without a pendingOpsTable"); elog(ERROR, "cannot sync without a pendingOpsTable");
/* /*
* If we are in the bgwriter, the sync had better include all fsync * If we are in the checkpointer, the sync had better include all fsync
* requests that were queued by backends up to this point. The tightest * requests that were queued by backends up to this point. The tightest
* race condition that could occur is that a buffer that must be written * race condition that could occur is that a buffer that must be written
* and fsync'd for the checkpoint could have been dumped by a backend just * and fsync'd for the checkpoint could have been dumped by a backend just
...@@ -1033,7 +1033,7 @@ mdsync(void) ...@@ -1033,7 +1033,7 @@ mdsync(void)
int failures; int failures;
/* /*
* If in bgwriter, we want to absorb pending requests every so * If in checkpointer, we want to absorb pending requests every so
* often to prevent overflow of the fsync request queue. It is * often to prevent overflow of the fsync request queue. It is
* unspecified whether newly-added entries will be visited by * unspecified whether newly-added entries will be visited by
* hash_seq_search, but we don't care since we don't need to * hash_seq_search, but we don't care since we don't need to
...@@ -1070,9 +1070,9 @@ mdsync(void) ...@@ -1070,9 +1070,9 @@ mdsync(void)
* say "but an unreferenced SMgrRelation is still a leak!" Not * say "but an unreferenced SMgrRelation is still a leak!" Not
* really, because the only case in which a checkpoint is done * really, because the only case in which a checkpoint is done
* by a process that isn't about to shut down is in the * by a process that isn't about to shut down is in the
* bgwriter, and it will periodically do smgrcloseall(). This * checkpointer, and it will periodically do smgrcloseall(). This
* fact justifies our not closing the reln in the success path * fact justifies our not closing the reln in the success path
* either, which is a good thing since in non-bgwriter cases * either, which is a good thing since in non-checkpointer cases
* we couldn't safely do that.) Furthermore, in many cases * we couldn't safely do that.) Furthermore, in many cases
* the relation will have been dirtied through this same smgr * the relation will have been dirtied through this same smgr
* relation, and so we can save a file open/close cycle. * relation, and so we can save a file open/close cycle.
...@@ -1301,7 +1301,7 @@ register_unlink(RelFileNodeBackend rnode) ...@@ -1301,7 +1301,7 @@ register_unlink(RelFileNodeBackend rnode)
else else
{ {
/* /*
* Notify the bgwriter about it. If we fail to queue the request * Notify the checkpointer about it. If we fail to queue the request
* message, we have to sleep and try again, because we can't simply * message, we have to sleep and try again, because we can't simply
* delete the file now. Ugly, but hopefully won't happen often. * delete the file now. Ugly, but hopefully won't happen often.
* *
...@@ -1315,10 +1315,10 @@ register_unlink(RelFileNodeBackend rnode) ...@@ -1315,10 +1315,10 @@ register_unlink(RelFileNodeBackend rnode)
} }
/* /*
* RememberFsyncRequest() -- callback from bgwriter side of fsync request * RememberFsyncRequest() -- callback from checkpointer side of fsync request
* *
* We stuff most fsync requests into the local hash table for execution * We stuff most fsync requests into the local hash table for execution
* during the bgwriter's next checkpoint. UNLINK requests go into a * during the checkpointer's next checkpoint. UNLINK requests go into a
* separate linked list, however, because they get processed separately. * separate linked list, however, because they get processed separately.
* *
* The range of possible segment numbers is way less than the range of * The range of possible segment numbers is way less than the range of
...@@ -1460,20 +1460,20 @@ ForgetRelationFsyncRequests(RelFileNodeBackend rnode, ForkNumber forknum) ...@@ -1460,20 +1460,20 @@ ForgetRelationFsyncRequests(RelFileNodeBackend rnode, ForkNumber forknum)
else if (IsUnderPostmaster) else if (IsUnderPostmaster)
{ {
/* /*
* Notify the bgwriter about it. If we fail to queue the revoke * Notify the checkpointer about it. If we fail to queue the revoke
* message, we have to sleep and try again ... ugly, but hopefully * message, we have to sleep and try again ... ugly, but hopefully
* won't happen often. * won't happen often.
* *
* XXX should we CHECK_FOR_INTERRUPTS in this loop? Escaping with an * XXX should we CHECK_FOR_INTERRUPTS in this loop? Escaping with an
* error would leave the no-longer-used file still present on disk, * error would leave the no-longer-used file still present on disk,
* which would be bad, so I'm inclined to assume that the bgwriter * which would be bad, so I'm inclined to assume that the checkpointer
* will always empty the queue soon. * will always empty the queue soon.
*/ */
while (!ForwardFsyncRequest(rnode, forknum, FORGET_RELATION_FSYNC)) while (!ForwardFsyncRequest(rnode, forknum, FORGET_RELATION_FSYNC))
pg_usleep(10000L); /* 10 msec seems a good number */ pg_usleep(10000L); /* 10 msec seems a good number */
/* /*
* Note we don't wait for the bgwriter to actually absorb the revoke * Note we don't wait for the checkpointer to actually absorb the revoke
* message; see mdsync() for the implications. * message; see mdsync() for the implications.
*/ */
} }
......
...@@ -256,7 +256,7 @@ typedef struct RmgrData ...@@ -256,7 +256,7 @@ typedef struct RmgrData
extern const RmgrData RmgrTable[]; extern const RmgrData RmgrTable[];
/* /*
* Exported to support xlog switching from bgwriter * Exported to support xlog switching from checkpointer
*/ */
extern pg_time_t GetLastSegSwitchTime(void); extern pg_time_t GetLastSegSwitchTime(void);
extern XLogRecPtr RequestXLogSwitch(void); extern XLogRecPtr RequestXLogSwitch(void);
......
...@@ -22,6 +22,7 @@ typedef enum ...@@ -22,6 +22,7 @@ typedef enum
BootstrapProcess, BootstrapProcess,
StartupProcess, StartupProcess,
BgWriterProcess, BgWriterProcess,
CheckpointerProcess,
WalWriterProcess, WalWriterProcess,
WalReceiverProcess, WalReceiverProcess,
......
...@@ -23,6 +23,7 @@ extern int CheckPointWarning; ...@@ -23,6 +23,7 @@ extern int CheckPointWarning;
extern double CheckPointCompletionTarget; extern double CheckPointCompletionTarget;
extern void BackgroundWriterMain(void); extern void BackgroundWriterMain(void);
extern void CheckpointerMain(void);
extern void RequestCheckpoint(int flags); extern void RequestCheckpoint(int flags);
extern void CheckpointWriteDelay(int flags, double progress); extern void CheckpointWriteDelay(int flags, double progress);
......
...@@ -190,11 +190,11 @@ extern PROC_HDR *ProcGlobal; ...@@ -190,11 +190,11 @@ extern PROC_HDR *ProcGlobal;
* We set aside some extra PGPROC structures for auxiliary processes, * We set aside some extra PGPROC structures for auxiliary processes,
* ie things that aren't full-fledged backends but need shmem access. * ie things that aren't full-fledged backends but need shmem access.
* *
* Background writer and WAL writer run during normal operation. Startup * Background writer, checkpointer and WAL writer run during normal operation.
* process and WAL receiver also consume 2 slots, but WAL writer is * Startup process and WAL receiver also consume 2 slots, but WAL writer is
* launched only after startup has exited, so we only need 3 slots. * launched only after startup has exited, so we only need 4 slots.
*/ */
#define NUM_AUXILIARY_PROCS 3 #define NUM_AUXILIARY_PROCS 4
/* configurable options */ /* configurable options */
......
...@@ -19,7 +19,7 @@ ...@@ -19,7 +19,7 @@
/* /*
* Reasons for signalling a Postgres child process (a backend or an auxiliary * Reasons for signalling a Postgres child process (a backend or an auxiliary
* process, like bgwriter). We can cope with concurrent signals for different * process, like checkpointer). We can cope with concurrent signals for different
* reasons. However, if the same reason is signaled multiple times in quick * reasons. However, if the same reason is signaled multiple times in quick
* succession, the process is likely to observe only one notification of it. * succession, the process is likely to observe only one notification of it.
* This is okay for the present uses. * This is okay for the present uses.
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment