Commit e82d9e62 authored by Tom Lane's avatar Tom Lane

Adjust elog.c so that elog(FATAL) exits (including cases where ERROR is

promoted to FATAL) end in exit(1) not exit(0).  Then change the postmaster to
allow exit(1) without a system-wide panic, but not for the startup subprocess
or the bgwriter.  There were a couple of places that were using exit(1) to
deliberately force a system-wide panic; adjust these to be exit(2) instead.
This fixes the problem noted back in July that if the startup process exits
with elog(ERROR), the postmaster would think everything is hunky-dory and
proceed to start up.  Alternative solutions such as trying to run the entire
startup process as a critical section seem less clean, primarily because of
the fact that a fair amount of startup code is shared by all postmaster
children in the EXEC_BACKEND case.  We'd need an ugly special case somewhere
near the head of main.c to make it work if it's the child process's
responsibility to determine what happens; and what's the point when the
postmaster already treats different children differently?
parent 778bb7b6
...@@ -8,7 +8,7 @@ ...@@ -8,7 +8,7 @@
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
* *
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/bootstrap/bootstrap.c,v 1.225 2006/10/04 00:29:49 momjian Exp $ * $PostgreSQL: pgsql/src/backend/bootstrap/bootstrap.c,v 1.226 2006/11/21 00:49:54 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -421,15 +421,8 @@ BootstrapMain(int argc, char *argv[]) ...@@ -421,15 +421,8 @@ BootstrapMain(int argc, char *argv[])
case BS_XLOG_STARTUP: case BS_XLOG_STARTUP:
bootstrap_signals(); bootstrap_signals();
StartupXLOG(); StartupXLOG();
/*
* These next two functions don't consider themselves critical,
* but we'd best PANIC anyway if they fail.
*/
START_CRIT_SECTION();
LoadFreeSpaceMap(); LoadFreeSpaceMap();
BuildFlatFiles(false); BuildFlatFiles(false);
END_CRIT_SECTION();
proc_exit(0); /* startup done */ proc_exit(0); /* startup done */
case BS_XLOG_BGWRITER: case BS_XLOG_BGWRITER:
......
...@@ -37,7 +37,7 @@ ...@@ -37,7 +37,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/postmaster/bgwriter.c,v 1.29 2006/10/06 17:13:59 petere Exp $ * $PostgreSQL: pgsql/src/backend/postmaster/bgwriter.c,v 1.30 2006/11/21 00:49:55 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -503,12 +503,12 @@ bg_quickdie(SIGNAL_ARGS) ...@@ -503,12 +503,12 @@ bg_quickdie(SIGNAL_ARGS)
* corrupted, so we don't want to try to clean up our transaction. Just * corrupted, so we don't want to try to clean up our transaction. Just
* nail the windows shut and get out of town. * nail the windows shut and get out of town.
* *
* Note we do exit(1) not exit(0). This is to force the postmaster into a * Note we do exit(2) not exit(0). This is to force the postmaster into a
* system reset cycle if some idiot DBA sends a manual SIGQUIT to a random * system reset cycle if some idiot DBA sends a manual SIGQUIT to a random
* backend. This is necessary precisely because we don't clean up our * backend. This is necessary precisely because we don't clean up our
* shared memory state. * shared memory state.
*/ */
exit(1); exit(2);
} }
/* SIGHUP: set flag to re-read config file at next convenient time */ /* SIGHUP: set flag to re-read config file at next convenient time */
......
...@@ -37,7 +37,7 @@ ...@@ -37,7 +37,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/postmaster/postmaster.c,v 1.501 2006/11/05 22:42:09 tgl Exp $ * $PostgreSQL: pgsql/src/backend/postmaster/postmaster.c,v 1.502 2006/11/21 00:49:55 tgl Exp $
* *
* NOTES * NOTES
* *
...@@ -358,6 +358,10 @@ static void ShmemBackendArrayRemove(pid_t pid); ...@@ -358,6 +358,10 @@ static void ShmemBackendArrayRemove(pid_t pid);
#define StartupDataBase() StartChildProcess(BS_XLOG_STARTUP) #define StartupDataBase() StartChildProcess(BS_XLOG_STARTUP)
#define StartBackgroundWriter() StartChildProcess(BS_XLOG_BGWRITER) #define StartBackgroundWriter() StartChildProcess(BS_XLOG_BGWRITER)
/* Macros to check exit status of a child process */
#define EXIT_STATUS_0(st) ((st) == 0)
#define EXIT_STATUS_1(st) (WIFEXITED(st) && WEXITSTATUS(st) == 1)
/* /*
* Postmaster main entry point * Postmaster main entry point
...@@ -2025,7 +2029,8 @@ reaper(SIGNAL_ARGS) ...@@ -2025,7 +2029,8 @@ reaper(SIGNAL_ARGS)
if (StartupPID != 0 && pid == StartupPID) if (StartupPID != 0 && pid == StartupPID)
{ {
StartupPID = 0; StartupPID = 0;
if (exitstatus != 0) /* Note: FATAL exit of startup is treated as catastrophic */
if (!EXIT_STATUS_0(exitstatus))
{ {
LogChildExit(LOG, _("startup process"), LogChildExit(LOG, _("startup process"),
pid, exitstatus); pid, exitstatus);
...@@ -2078,7 +2083,8 @@ reaper(SIGNAL_ARGS) ...@@ -2078,7 +2083,8 @@ reaper(SIGNAL_ARGS)
if (BgWriterPID != 0 && pid == BgWriterPID) if (BgWriterPID != 0 && pid == BgWriterPID)
{ {
BgWriterPID = 0; BgWriterPID = 0;
if (exitstatus == 0 && Shutdown > NoShutdown && !FatalError && if (EXIT_STATUS_0(exitstatus) &&
Shutdown > NoShutdown && !FatalError &&
!DLGetHead(BackendList) && AutoVacPID == 0) !DLGetHead(BackendList) && AutoVacPID == 0)
{ {
/* /*
...@@ -2096,7 +2102,8 @@ reaper(SIGNAL_ARGS) ...@@ -2096,7 +2102,8 @@ reaper(SIGNAL_ARGS)
} }
/* /*
* Any unexpected exit of the bgwriter is treated as a crash. * Any unexpected exit of the bgwriter (including FATAL exit)
* is treated as a crash.
*/ */
HandleChildCrash(pid, exitstatus, HandleChildCrash(pid, exitstatus,
_("background writer process")); _("background writer process"));
...@@ -2104,15 +2111,16 @@ reaper(SIGNAL_ARGS) ...@@ -2104,15 +2111,16 @@ reaper(SIGNAL_ARGS)
} }
/* /*
* Was it the autovacuum process? Normal exit can be ignored; we'll * Was it the autovacuum process? Normal or FATAL exit can be
* start a new one at the next iteration of the postmaster's main * ignored; we'll start a new one at the next iteration of the
* loop, if necessary. An unexpected exit is treated as a crash. * postmaster's main loop, if necessary. Any other exit condition
* is treated as a crash.
*/ */
if (AutoVacPID != 0 && pid == AutoVacPID) if (AutoVacPID != 0 && pid == AutoVacPID)
{ {
AutoVacPID = 0; AutoVacPID = 0;
autovac_stopped(); autovac_stopped();
if (exitstatus != 0) if (!EXIT_STATUS_0(exitstatus) && !EXIT_STATUS_1(exitstatus))
HandleChildCrash(pid, exitstatus, HandleChildCrash(pid, exitstatus,
_("autovacuum process")); _("autovacuum process"));
continue; continue;
...@@ -2126,7 +2134,7 @@ reaper(SIGNAL_ARGS) ...@@ -2126,7 +2134,7 @@ reaper(SIGNAL_ARGS)
if (PgArchPID != 0 && pid == PgArchPID) if (PgArchPID != 0 && pid == PgArchPID)
{ {
PgArchPID = 0; PgArchPID = 0;
if (exitstatus != 0) if (!EXIT_STATUS_0(exitstatus))
LogChildExit(LOG, _("archiver process"), LogChildExit(LOG, _("archiver process"),
pid, exitstatus); pid, exitstatus);
if (XLogArchivingActive() && if (XLogArchivingActive() &&
...@@ -2143,7 +2151,7 @@ reaper(SIGNAL_ARGS) ...@@ -2143,7 +2151,7 @@ reaper(SIGNAL_ARGS)
if (PgStatPID != 0 && pid == PgStatPID) if (PgStatPID != 0 && pid == PgStatPID)
{ {
PgStatPID = 0; PgStatPID = 0;
if (exitstatus != 0) if (!EXIT_STATUS_0(exitstatus))
LogChildExit(LOG, _("statistics collector process"), LogChildExit(LOG, _("statistics collector process"),
pid, exitstatus); pid, exitstatus);
if (StartupPID == 0 && !FatalError && Shutdown == NoShutdown) if (StartupPID == 0 && !FatalError && Shutdown == NoShutdown)
...@@ -2157,7 +2165,7 @@ reaper(SIGNAL_ARGS) ...@@ -2157,7 +2165,7 @@ reaper(SIGNAL_ARGS)
SysLoggerPID = 0; SysLoggerPID = 0;
/* for safety's sake, launch new logger *first* */ /* for safety's sake, launch new logger *first* */
SysLoggerPID = SysLogger_Start(); SysLoggerPID = SysLogger_Start();
if (exitstatus != 0) if (!EXIT_STATUS_0(exitstatus))
LogChildExit(LOG, _("system logger process"), LogChildExit(LOG, _("system logger process"),
pid, exitstatus); pid, exitstatus);
continue; continue;
...@@ -2229,12 +2237,12 @@ CleanupBackend(int pid, ...@@ -2229,12 +2237,12 @@ CleanupBackend(int pid,
LogChildExit(DEBUG2, _("server process"), pid, exitstatus); LogChildExit(DEBUG2, _("server process"), pid, exitstatus);
/* /*
* If a backend dies in an ugly way (i.e. exit status not 0) then we must * If a backend dies in an ugly way then we must signal all other backends
* signal all other backends to quickdie. If exit status is zero we * to quickdie. If exit status is zero (normal) or one (FATAL exit), we
* assume everything is hunky dory and simply remove the backend from the * assume everything is all right and simply remove the backend from the
* active backend list. * active backend list.
*/ */
if (exitstatus != 0) if (!EXIT_STATUS_0(exitstatus) && !EXIT_STATUS_1(exitstatus))
{ {
HandleChildCrash(pid, exitstatus, _("server process")); HandleChildCrash(pid, exitstatus, _("server process"));
return; return;
......
...@@ -8,7 +8,7 @@ ...@@ -8,7 +8,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/tcop/postgres.c,v 1.516 2006/10/19 19:52:22 tgl Exp $ * $PostgreSQL: pgsql/src/backend/tcop/postgres.c,v 1.517 2006/11/21 00:49:55 tgl Exp $
* *
* NOTES * NOTES
* this is the "main" module of the postgres backend and * this is the "main" module of the postgres backend and
...@@ -2327,12 +2327,12 @@ quickdie(SIGNAL_ARGS) ...@@ -2327,12 +2327,12 @@ quickdie(SIGNAL_ARGS)
* corrupted, so we don't want to try to clean up our transaction. Just * corrupted, so we don't want to try to clean up our transaction. Just
* nail the windows shut and get out of town. * nail the windows shut and get out of town.
* *
* Note we do exit(1) not exit(0). This is to force the postmaster into a * Note we do exit(2) not exit(0). This is to force the postmaster into a
* system reset cycle if some idiot DBA sends a manual SIGQUIT to a random * system reset cycle if some idiot DBA sends a manual SIGQUIT to a random
* backend. This is necessary precisely because we don't clean up our * backend. This is necessary precisely because we don't clean up our
* shared memory state. * shared memory state.
*/ */
exit(1); exit(2);
} }
/* /*
...@@ -2374,7 +2374,7 @@ die(SIGNAL_ARGS) ...@@ -2374,7 +2374,7 @@ die(SIGNAL_ARGS)
/* /*
* Timeout or shutdown signal from postmaster during client authentication. * Timeout or shutdown signal from postmaster during client authentication.
* Simply exit(0). * Simply exit(1).
* *
* XXX: possible future improvement: try to send a message indicating * XXX: possible future improvement: try to send a message indicating
* why we are disconnecting. Problem is to be sure we don't block while * why we are disconnecting. Problem is to be sure we don't block while
...@@ -2383,7 +2383,7 @@ die(SIGNAL_ARGS) ...@@ -2383,7 +2383,7 @@ die(SIGNAL_ARGS)
void void
authdie(SIGNAL_ARGS) authdie(SIGNAL_ARGS)
{ {
exit(0); exit(1);
} }
/* /*
......
...@@ -42,7 +42,7 @@ ...@@ -42,7 +42,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/utils/error/elog.c,v 1.175 2006/10/01 22:08:18 tgl Exp $ * $PostgreSQL: pgsql/src/backend/utils/error/elog.c,v 1.176 2006/11/21 00:49:55 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -421,25 +421,23 @@ errfinish(int dummy,...) ...@@ -421,25 +421,23 @@ errfinish(int dummy,...)
* fflush here is just to improve the odds that we get to see the * fflush here is just to improve the odds that we get to see the
* error message, in case things are so hosed that proc_exit crashes. * error message, in case things are so hosed that proc_exit crashes.
* Any other code you might be tempted to add here should probably be * Any other code you might be tempted to add here should probably be
* in an on_proc_exit callback instead. * in an on_proc_exit or on_shmem_exit callback instead.
*/ */
fflush(stdout); fflush(stdout);
fflush(stderr); fflush(stderr);
/* /*
* If proc_exit is already running, we exit with nonzero exit code to * Do normal process-exit cleanup, then return exit code 1 to indicate
* indicate that something's pretty wrong. We also want to exit with * FATAL termination. The postmaster may or may not consider this
* nonzero exit code if not running under the postmaster (for example, * worthy of panic, depending on which subprocess returns it.
* if we are being run from the initdb script, we'd better return an
* error status).
*/ */
proc_exit(proc_exit_inprogress || !IsUnderPostmaster); proc_exit(1);
} }
if (elevel >= PANIC) if (elevel >= PANIC)
{ {
/* /*
* Serious crash time. Postmaster will observe nonzero process exit * Serious crash time. Postmaster will observe SIGABRT process exit
* status and kill the other backends too. * status and kill the other backends too.
* *
* XXX: what if we are *in* the postmaster? abort() won't kill our * XXX: what if we are *in* the postmaster? abort() won't kill our
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment