Commit 3ad0728c authored by Tom Lane's avatar Tom Lane

On systems that have setsid(2) (which should be just about everything except

Windows), arrange for each postmaster child process to be its own process
group leader, and deliver signals SIGINT, SIGTERM, SIGQUIT to the whole
process group not only the direct child process.  This provides saner behavior
for archive and recovery scripts; in particular, it's possible to shut down a
warm-standby recovery server using "pg_ctl stop -m immediate", since delivery
of SIGQUIT to the startup subprocess will result in killing the waiting
recovery_command.  Also, this makes Query Cancel and statement_timeout apply
to scripts being run from backends via system().  (There is no support in the
core backend for that, but it's widely done using untrusted PLs.)  Per gripe
from Stephen Harris and subsequent discussion.
parent de597154
......@@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2006, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* $PostgreSQL: pgsql/src/backend/access/transam/xlog.c,v 1.256 2006/11/16 14:28:41 petere Exp $
* $PostgreSQL: pgsql/src/backend/access/transam/xlog.c,v 1.257 2006/11/21 20:59:52 tgl Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -18,9 +18,10 @@
#include <fcntl.h>
#include <signal.h>
#include <time.h>
#include <unistd.h>
#include <sys/stat.h>
#include <sys/time.h>
#include <sys/wait.h>
#include <unistd.h>
#include "access/clog.h"
#include "access/heapam.h"
......@@ -2373,6 +2374,7 @@ RestoreArchivedFile(char *path, const char *xlogfname,
char *endp;
const char *sp;
int rc;
bool signaled;
struct stat stat_buf;
/*
......@@ -2516,13 +2518,28 @@ RestoreArchivedFile(char *path, const char *xlogfname,
}
/*
* remember, we rollforward UNTIL the restore fails so failure here is
* Remember, we rollforward UNTIL the restore fails so failure here is
* just part of the process... that makes it difficult to determine
* whether the restore failed because there isn't an archive to restore,
* or because the administrator has specified the restore program
* incorrectly. We have to assume the former.
*
* However, if the failure was due to any sort of signal, it's best to
* punt and abort recovery. (If we "return false" here, upper levels
* will assume that recovery is complete and start up the database!)
* It's essential to abort on child SIGINT and SIGQUIT, because per spec
* system() ignores SIGINT and SIGQUIT while waiting; if we see one of
* those it's a good bet we should have gotten it too. Aborting on other
* signals such as SIGTERM seems a good idea as well.
*
* Per the Single Unix Spec, shells report exit status > 128 when
* a called command died on a signal. Also, 126 and 127 are used to
* report problems such as an unfindable command; treat those as fatal
* errors too.
*/
ereport(DEBUG2,
signaled = WIFSIGNALED(rc) || WEXITSTATUS(rc) > 125;
ereport(signaled ? FATAL : DEBUG2,
(errmsg("could not restore file \"%s\" from archive: return code %d",
xlogfname, rc)));
......
......@@ -8,7 +8,7 @@
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/bootstrap/bootstrap.c,v 1.226 2006/11/21 00:49:54 tgl Exp $
* $PostgreSQL: pgsql/src/backend/bootstrap/bootstrap.c,v 1.227 2006/11/21 20:59:52 tgl Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -500,6 +500,15 @@ bootstrap_signals(void)
{
if (IsUnderPostmaster)
{
/*
* If possible, make this process a group leader, so that the
* postmaster can signal any child processes too.
*/
#ifdef HAVE_SETSID
if (setsid() < 0)
elog(FATAL, "setsid() failed: %m");
#endif
/*
* Properly accept or ignore signals the postmaster might send us
*/
......
......@@ -10,7 +10,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/postmaster/autovacuum.c,v 1.28 2006/11/05 22:42:09 tgl Exp $
* $PostgreSQL: pgsql/src/backend/postmaster/autovacuum.c,v 1.29 2006/11/21 20:59:52 tgl Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -243,6 +243,17 @@ AutoVacMain(int argc, char *argv[])
SetProcessingMode(InitProcessing);
/*
* If possible, make this process a group leader, so that the postmaster
* can signal any child processes too. (autovacuum probably never has
* any child processes, but for consistency we make all postmaster
* child processes do this.)
*/
#ifdef HAVE_SETSID
if (setsid() < 0)
elog(FATAL, "setsid() failed: %m");
#endif
/*
* Set up signal handlers. We operate on databases much like a regular
* backend, so we use the same signal handling. See equivalent code in
......
......@@ -37,7 +37,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/postmaster/bgwriter.c,v 1.30 2006/11/21 00:49:55 tgl Exp $
* $PostgreSQL: pgsql/src/backend/postmaster/bgwriter.c,v 1.31 2006/11/21 20:59:52 tgl Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -45,6 +45,7 @@
#include <signal.h>
#include <time.h>
#include <unistd.h>
#include "access/xlog_internal.h"
#include "libpq/pqsignal.h"
......@@ -170,6 +171,17 @@ BackgroundWriterMain(void)
BgWriterShmem->bgwriter_pid = MyProcPid;
am_bg_writer = true;
/*
* If possible, make this process a group leader, so that the postmaster
* can signal any child processes too. (bgwriter probably never has
* any child processes, but for consistency we make all postmaster
* child processes do this.)
*/
#ifdef HAVE_SETSID
if (setsid() < 0)
elog(FATAL, "setsid() failed: %m");
#endif
/*
* Properly accept or ignore signals the postmaster might send us
*
......
......@@ -19,7 +19,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/postmaster/pgarch.c,v 1.26 2006/11/10 22:32:20 tgl Exp $
* $PostgreSQL: pgsql/src/backend/postmaster/pgarch.c,v 1.27 2006/11/21 20:59:52 tgl Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -29,6 +29,7 @@
#include <signal.h>
#include <time.h>
#include <sys/time.h>
#include <sys/wait.h>
#include <unistd.h>
#include "access/xlog_internal.h"
......@@ -222,6 +223,15 @@ PgArchiverMain(int argc, char *argv[])
MyProcPid = getpid(); /* reset MyProcPid */
/*
* If possible, make this process a group leader, so that the postmaster
* can signal any child processes too.
*/
#ifdef HAVE_SETSID
if (setsid() < 0)
elog(FATAL, "setsid() failed: %m");
#endif
/*
* Ignore all signals usually bound to some action in the postmaster,
* except for SIGHUP, SIGUSR1 and SIGQUIT.
......@@ -456,9 +466,22 @@ pgarch_archiveXlog(char *xlog)
rc = system(xlogarchcmd);
if (rc != 0)
{
ereport(LOG,
/*
* If either the shell itself, or a called command, died on a signal,
* abort the archiver. We do this because system() ignores SIGINT and
* SIGQUIT while waiting; so a signal is very likely something that
* should have interrupted us too. If we overreact it's no big deal,
* the postmaster will just start the archiver again.
*
* Per the Single Unix Spec, shells report exit status > 128 when
* a called command died on a signal.
*/
bool signaled = WIFSIGNALED(rc) || WEXITSTATUS(rc) > 128;
ereport(signaled ? FATAL : LOG,
(errmsg("archive command \"%s\" failed: return code %d",
xlogarchcmd, rc)));
return false;
}
ereport(LOG,
......
......@@ -13,7 +13,7 @@
*
* Copyright (c) 2001-2006, PostgreSQL Global Development Group
*
* $PostgreSQL: pgsql/src/backend/postmaster/pgstat.c,v 1.139 2006/10/04 00:29:56 momjian Exp $
* $PostgreSQL: pgsql/src/backend/postmaster/pgstat.c,v 1.140 2006/11/21 20:59:52 tgl Exp $
* ----------
*/
#include "postgres.h"
......@@ -1613,6 +1613,17 @@ PgstatCollectorMain(int argc, char *argv[])
MyProcPid = getpid(); /* reset MyProcPid */
/*
* If possible, make this process a group leader, so that the postmaster
* can signal any child processes too. (pgstat probably never has
* any child processes, but for consistency we make all postmaster
* child processes do this.)
*/
#ifdef HAVE_SETSID
if (setsid() < 0)
elog(FATAL, "setsid() failed: %m");
#endif
/*
* Ignore all signals usually bound to some action in the postmaster,
* except SIGQUIT and SIGALRM.
......
......@@ -37,7 +37,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/postmaster/postmaster.c,v 1.502 2006/11/21 00:49:55 tgl Exp $
* $PostgreSQL: pgsql/src/backend/postmaster/postmaster.c,v 1.503 2006/11/21 20:59:52 tgl Exp $
*
* NOTES
*
......@@ -268,6 +268,7 @@ static void report_fork_failure_to_client(Port *port, int errnum);
static enum CAC_state canAcceptConnections(void);
static long PostmasterRandom(void);
static void RandomSalt(char *cryptSalt, char *md5Salt);
static void signal_child(pid_t pid, int signal);
static void SignalChildren(int signal);
static int CountChildren(void);
static bool CreateOptsFile(int argc, char *argv[], char *fullprogname);
......@@ -1229,7 +1230,7 @@ ServerLoop(void)
BgWriterPID = StartBackgroundWriter();
/* If shutdown is pending, set it going */
if (Shutdown > NoShutdown && BgWriterPID != 0)
kill(BgWriterPID, SIGUSR2);
signal_child(BgWriterPID, SIGUSR2);
}
/*
......@@ -1639,7 +1640,7 @@ processCancelRequest(Port *port, void *pkt)
ereport(DEBUG2,
(errmsg_internal("processing cancel request: sending SIGINT to process %d",
backendPID)));
kill(bp->pid, SIGINT);
signal_child(bp->pid, SIGINT);
}
else
/* Right PID, wrong key: no way, Jose */
......@@ -1813,13 +1814,13 @@ SIGHUP_handler(SIGNAL_ARGS)
ProcessConfigFile(PGC_SIGHUP);
SignalChildren(SIGHUP);
if (BgWriterPID != 0)
kill(BgWriterPID, SIGHUP);
signal_child(BgWriterPID, SIGHUP);
if (AutoVacPID != 0)
kill(AutoVacPID, SIGHUP);
signal_child(AutoVacPID, SIGHUP);
if (PgArchPID != 0)
kill(PgArchPID, SIGHUP);
signal_child(PgArchPID, SIGHUP);
if (SysLoggerPID != 0)
kill(SysLoggerPID, SIGHUP);
signal_child(SysLoggerPID, SIGHUP);
/* PgStatPID does not currently need SIGHUP */
/* Reload authentication config files too */
......@@ -1873,7 +1874,7 @@ pmdie(SIGNAL_ARGS)
if (AutoVacPID != 0)
{
/* Use statement cancel to shut it down */
kill(AutoVacPID, SIGINT);
signal_child(AutoVacPID, SIGINT);
break; /* let reaper() handle this */
}
......@@ -1890,13 +1891,13 @@ pmdie(SIGNAL_ARGS)
BgWriterPID = StartBackgroundWriter();
/* And tell it to shut down */
if (BgWriterPID != 0)
kill(BgWriterPID, SIGUSR2);
signal_child(BgWriterPID, SIGUSR2);
/* Tell pgarch to shut down too; nothing left for it to do */
if (PgArchPID != 0)
kill(PgArchPID, SIGQUIT);
signal_child(PgArchPID, SIGQUIT);
/* Tell pgstat to shut down too; nothing left for it to do */
if (PgStatPID != 0)
kill(PgStatPID, SIGQUIT);
signal_child(PgStatPID, SIGQUIT);
break;
case SIGINT:
......@@ -1921,7 +1922,7 @@ pmdie(SIGNAL_ARGS)
(errmsg("aborting any active transactions")));
SignalChildren(SIGTERM);
if (AutoVacPID != 0)
kill(AutoVacPID, SIGTERM);
signal_child(AutoVacPID, SIGTERM);
/* reaper() does the rest */
}
break;
......@@ -1940,13 +1941,13 @@ pmdie(SIGNAL_ARGS)
BgWriterPID = StartBackgroundWriter();
/* And tell it to shut down */
if (BgWriterPID != 0)
kill(BgWriterPID, SIGUSR2);
signal_child(BgWriterPID, SIGUSR2);
/* Tell pgarch to shut down too; nothing left for it to do */
if (PgArchPID != 0)
kill(PgArchPID, SIGQUIT);
signal_child(PgArchPID, SIGQUIT);
/* Tell pgstat to shut down too; nothing left for it to do */
if (PgStatPID != 0)
kill(PgStatPID, SIGQUIT);
signal_child(PgStatPID, SIGQUIT);
break;
case SIGQUIT:
......@@ -1960,15 +1961,15 @@ pmdie(SIGNAL_ARGS)
ereport(LOG,
(errmsg("received immediate shutdown request")));
if (StartupPID != 0)
kill(StartupPID, SIGQUIT);
signal_child(StartupPID, SIGQUIT);
if (BgWriterPID != 0)
kill(BgWriterPID, SIGQUIT);
signal_child(BgWriterPID, SIGQUIT);
if (AutoVacPID != 0)
kill(AutoVacPID, SIGQUIT);
signal_child(AutoVacPID, SIGQUIT);
if (PgArchPID != 0)
kill(PgArchPID, SIGQUIT);
signal_child(PgArchPID, SIGQUIT);
if (PgStatPID != 0)
kill(PgStatPID, SIGQUIT);
signal_child(PgStatPID, SIGQUIT);
if (DLGetHead(BackendList))
SignalChildren(SIGQUIT);
ExitPostmaster(0);
......@@ -2065,7 +2066,7 @@ reaper(SIGNAL_ARGS)
* (We could, but don't, try to start autovacuum here.)
*/
if (Shutdown > NoShutdown && BgWriterPID != 0)
kill(BgWriterPID, SIGUSR2);
signal_child(BgWriterPID, SIGUSR2);
else if (Shutdown == NoShutdown)
{
if (XLogArchivingActive() && PgArchPID == 0)
......@@ -2207,13 +2208,13 @@ reaper(SIGNAL_ARGS)
BgWriterPID = StartBackgroundWriter();
/* And tell it to shut down */
if (BgWriterPID != 0)
kill(BgWriterPID, SIGUSR2);
signal_child(BgWriterPID, SIGUSR2);
/* Tell pgarch to shut down too; nothing left for it to do */
if (PgArchPID != 0)
kill(PgArchPID, SIGQUIT);
signal_child(PgArchPID, SIGQUIT);
/* Tell pgstat to shut down too; nothing left for it to do */
if (PgStatPID != 0)
kill(PgStatPID, SIGQUIT);
signal_child(PgStatPID, SIGQUIT);
}
reaper_done:
......@@ -2324,7 +2325,7 @@ HandleChildCrash(int pid, int exitstatus, const char *procname)
(errmsg_internal("sending %s to process %d",
(SendStop ? "SIGSTOP" : "SIGQUIT"),
(int) bp->pid)));
kill(bp->pid, (SendStop ? SIGSTOP : SIGQUIT));
signal_child(bp->pid, (SendStop ? SIGSTOP : SIGQUIT));
}
}
}
......@@ -2338,7 +2339,7 @@ HandleChildCrash(int pid, int exitstatus, const char *procname)
(errmsg_internal("sending %s to process %d",
(SendStop ? "SIGSTOP" : "SIGQUIT"),
(int) BgWriterPID)));
kill(BgWriterPID, (SendStop ? SIGSTOP : SIGQUIT));
signal_child(BgWriterPID, (SendStop ? SIGSTOP : SIGQUIT));
}
/* Take care of the autovacuum daemon too */
......@@ -2350,7 +2351,7 @@ HandleChildCrash(int pid, int exitstatus, const char *procname)
(errmsg_internal("sending %s to process %d",
(SendStop ? "SIGSTOP" : "SIGQUIT"),
(int) AutoVacPID)));
kill(AutoVacPID, (SendStop ? SIGSTOP : SIGQUIT));
signal_child(AutoVacPID, (SendStop ? SIGSTOP : SIGQUIT));
}
/* Force a power-cycle of the pgarch process too */
......@@ -2361,7 +2362,7 @@ HandleChildCrash(int pid, int exitstatus, const char *procname)
(errmsg_internal("sending %s to process %d",
"SIGQUIT",
(int) PgArchPID)));
kill(PgArchPID, SIGQUIT);
signal_child(PgArchPID, SIGQUIT);
}
/* Force a power-cycle of the pgstat process too */
......@@ -2372,7 +2373,7 @@ HandleChildCrash(int pid, int exitstatus, const char *procname)
(errmsg_internal("sending %s to process %d",
"SIGQUIT",
(int) PgStatPID)));
kill(PgStatPID, SIGQUIT);
signal_child(PgStatPID, SIGQUIT);
}
/* We do NOT restart the syslogger */
......@@ -2415,6 +2416,43 @@ LogChildExit(int lev, const char *procname, int pid, int exitstatus)
procname, pid, exitstatus)));
}
/*
* Send a signal to a postmaster child process
*
* On systems that have setsid(), each child process sets itself up as a
* process group leader. For signals that are generally interpreted in the
* appropriate fashion, we signal the entire process group not just the
* direct child process. This allows us to, for example, SIGQUIT a blocked
* archive_recovery script, or SIGINT a script being run by a backend via
* system().
*
* There is a race condition for recently-forked children: they might not
* have executed setsid() yet. So we signal the child directly as well as
* the group. We assume such a child will handle the signal before trying
* to spawn any grandchild processes. We also assume that signaling the
* child twice will not cause any problems.
*/
static void
signal_child(pid_t pid, int signal)
{
if (kill(pid, signal) < 0)
elog(DEBUG3, "kill(%ld,%d) failed: %m", (long) pid, signal);
#ifdef HAVE_SETSID
switch (signal)
{
case SIGINT:
case SIGTERM:
case SIGQUIT:
case SIGSTOP:
if (kill(-pid, signal) < 0)
elog(DEBUG3, "kill(%ld,%d) failed: %m", (long) (-pid), signal);
break;
default:
break;
}
#endif
}
/*
* Send a signal to all backend children (but NOT special children)
*/
......@@ -2430,7 +2468,7 @@ SignalChildren(int signal)
ereport(DEBUG4,
(errmsg_internal("sending signal %d to process %d",
signal, (int) bp->pid)));
kill(bp->pid, signal);
signal_child(bp->pid, signal);
}
}
......@@ -2641,7 +2679,17 @@ BackendInitialize(Port *port)
whereToSendOutput = DestRemote; /* now safe to ereport to client */
/*
* We arrange for a simple exit(0) if we receive SIGTERM or SIGQUIT during
* If possible, make this process a group leader, so that the postmaster
* can signal any child processes too. (We do this now on the off chance
* that something might spawn a child process during authentication.)
*/
#ifdef HAVE_SETSID
if (setsid() < 0)
elog(FATAL, "setsid() failed: %m");
#endif
/*
* We arrange for a simple exit(1) if we receive SIGTERM or SIGQUIT during
* any client authentication related communication. Otherwise the
* postmaster cannot shutdown the database FAST or IMMED cleanly if a
* buggy client blocks a backend during authentication.
......@@ -3413,7 +3461,7 @@ sigusr1_handler(SIGNAL_ARGS)
{
SignalChildren(SIGUSR1);
if (AutoVacPID != 0)
kill(AutoVacPID, SIGUSR1);
signal_child(AutoVacPID, SIGUSR1);
}
}
......@@ -3424,14 +3472,14 @@ sigusr1_handler(SIGNAL_ARGS)
* Send SIGUSR1 to archiver process, to wake it up and begin archiving
* next transaction log file.
*/
kill(PgArchPID, SIGUSR1);
signal_child(PgArchPID, SIGUSR1);
}
if (CheckPostmasterSignal(PMSIGNAL_ROTATE_LOGFILE) &&
SysLoggerPID != 0)
{
/* Tell syslogger to rotate logfile */
kill(SysLoggerPID, SIGUSR1);
signal_child(SysLoggerPID, SIGUSR1);
}
if (CheckPostmasterSignal(PMSIGNAL_START_AUTOVAC))
......
......@@ -18,7 +18,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/postmaster/syslogger.c,v 1.28 2006/07/16 20:17:04 tgl Exp $
* $PostgreSQL: pgsql/src/backend/postmaster/syslogger.c,v 1.29 2006/11/21 20:59:52 tgl Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -182,6 +182,17 @@ SysLoggerMain(int argc, char *argv[])
syslogPipe[1] = 0;
#endif
/*
* If possible, make this process a group leader, so that the postmaster
* can signal any child processes too. (syslogger probably never has
* any child processes, but for consistency we make all postmaster
* child processes do this.)
*/
#ifdef HAVE_SETSID
if (setsid() < 0)
elog(FATAL, "setsid() failed: %m");
#endif
/*
* Properly accept or ignore signals the postmaster might send us
*
......
......@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/storage/lmgr/proc.c,v 1.180 2006/10/04 00:29:57 momjian Exp $
* $PostgreSQL: pgsql/src/backend/storage/lmgr/proc.c,v 1.181 2006/11/21 20:59:52 tgl Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -1243,6 +1243,10 @@ CheckStatementTimeout(void)
/* Time to die */
statement_timeout_active = false;
cancel_from_timeout = true;
#ifdef HAVE_SETSID
/* try to signal whole process group */
kill(-MyProcPid, SIGINT);
#endif
kill(MyProcPid, SIGINT);
}
else
......
......@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/tcop/postgres.c,v 1.517 2006/11/21 00:49:55 tgl Exp $
* $PostgreSQL: pgsql/src/backend/tcop/postgres.c,v 1.518 2006/11/21 20:59:52 tgl Exp $
*
* NOTES
* this is the "main" module of the postgres backend and
......@@ -2989,6 +2989,11 @@ PostgresMain(int argc, char *argv[], const char *username)
if (PostAuthDelay)
pg_usleep(PostAuthDelay * 1000000L);
/*
* You might expect to see a setsid() call here, but it's not needed,
* because if we are under a postmaster then BackendInitialize() did it.
*/
/*
* Set up signal handlers and masks.
*
......
......@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/utils/adt/misc.c,v 1.54 2006/10/04 00:29:59 momjian Exp $
* $PostgreSQL: pgsql/src/backend/utils/adt/misc.c,v 1.55 2006/11/21 20:59:53 tgl Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -93,7 +93,12 @@ pg_signal_backend(int pid, int sig)
return false;
}
/* If we have setsid(), signal the backend's whole process group */
#ifdef HAVE_SETSID
if (kill(-pid, sig))
#else
if (kill(pid, sig))
#endif
{
/* Again, just a warning to allow loops */
ereport(WARNING,
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment