Commit 66ec2db7 authored by Tom Lane's avatar Tom Lane

XLOG file archiving and point-in-time recovery. There are still some

loose ends and a glaring lack of documentation, but it basically works.

Simon Riggs with some editorialization by Tom Lane.
parent d0c1bbdc
......@@ -56,7 +56,7 @@
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/access/nbtree/nbtsort.c,v 1.83 2004/07/11 18:01:45 tgl Exp $
* $PostgreSQL: pgsql/src/backend/access/nbtree/nbtsort.c,v 1.84 2004/07/19 02:47:03 tgl Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -64,6 +64,7 @@
#include "postgres.h"
#include "access/nbtree.h"
#include "access/xlog.h"
#include "miscadmin.h"
#include "storage/smgr.h"
#include "utils/tuplesort.h"
......@@ -222,15 +223,9 @@ _bt_leafbuild(BTSpool *btspool, BTSpool *btspool2)
/*
* We need to log index creation in WAL iff WAL archiving is enabled
* AND it's not a temp index.
*
* XXX when WAL archiving is actually supported, this test will likely
* need to change; and the hardwired extern is cruddy anyway ...
*/
{
extern char XLOG_archive_dir[];
wstate.btws_use_wal = XLogArchivingActive() && !wstate.index->rd_istemp;
wstate.btws_use_wal = XLOG_archive_dir[0] && !wstate.index->rd_istemp;
}
/* reserve the metapage */
wstate.btws_pages_alloced = BTREE_METAPAGE + 1;
wstate.btws_pages_written = 0;
......
# -------------------------------
# PostgreSQL recovery config file
# -------------------------------
#
# Edit this file to provide the parameters that PostgreSQL
# needs to perform an archive recovery of a database
#
# If "recovery.conf" is present in the PostgreSQL data directory, it is
# read on postmaster startup. After successful recovery, it is renamed
# to "recovery.done" to ensure that we do not accidentally re-enter archive
# recovery mode.
#
# This file consists of lines of the form:
#
# name = 'value'
#
# (The quotes around the value are NOT optional, but the "=" is.)
#
# Comments are introduced with '#'.
#
# The complete list of option names and
# allowed values can be found in the PostgreSQL documentation. The
# commented-out settings shown below are sample values.
#
#---------------------------------------------------------------------------
# REQUIRED PARAMETERS
#---------------------------------------------------------------------------
#
# restore command
#
# specifies the shell command that is executed to copy log files
# back from archival storage. The command string may contain %f,
# which is replaced by the name of the desired log file, and %p,
# which is replaced by the absolute path to copy the log file to.
#
# It is important that the command return nonzero exit status on failure.
# The command *will* be asked for log files that are not present in the
# archive; it must return nonzero when so asked.
#
# NOTE that the basename of %p will be different from %f; do not
# expect them to be interchangeable.
#
#
#restore_command = 'cp /mnt/server/archivedir/%f %p'
#
#
#---------------------------------------------------------------------------
# OPTIONAL PARAMETERS
#---------------------------------------------------------------------------
#
# By default, recovery will rollforward to the end of the WAL log.
# If you want to stop rollforward before that point, you
# MUST set a recovery target.
#
# You may set a recovery target either by transactionId, or
# by timestamp. Recovery may either include or exclude the
# records with the recovery target value (ie, stop either just
# after or just before the given target).
#
#recovery_target_time = '2004-07-14 22:39:00'
#
# note: target time is interpreted by strptime() and must therefore be
# given in your system's default timezone.
#
#recovery_target_xid = '11000'
#
# true or false
#recovery_target_inclusive = 'true'
#
#---------------------------------------------------------------------------
......@@ -7,13 +7,14 @@
* Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* $PostgreSQL: pgsql/src/backend/access/transam/xlog.c,v 1.147 2004/07/01 00:49:50 tgl Exp $
* $PostgreSQL: pgsql/src/backend/access/transam/xlog.c,v 1.148 2004/07/19 02:47:05 tgl Exp $
*
*-------------------------------------------------------------------------
*/
#include "postgres.h"
#include <ctype.h>
#include <fcntl.h>
#include <signal.h>
#include <time.h>
......@@ -33,6 +34,7 @@
#include "storage/bufpage.h"
#include "storage/fd.h"
#include "storage/lwlock.h"
#include "storage/pmsignal.h"
#include "storage/proc.h"
#include "storage/sinval.h"
#include "storage/spin.h"
......@@ -87,10 +89,9 @@
/* User-settable parameters */
int CheckPointSegments = 3;
int XLOGbuffers = 8;
char *XLogArchiveCommand = NULL;
char *XLOG_sync_method = NULL;
const char XLOG_sync_method_default[] = DEFAULT_SYNC_METHOD_STR;
char XLOG_archive_dir[MAXPGPATH]; /* null string means
* delete 'em */
#ifdef WAL_DEBUG
bool XLOG_DEBUG = false;
......@@ -125,8 +126,19 @@ static int open_sync_bit = DEFAULT_SYNC_FLAGBIT;
*/
StartUpID ThisStartUpID = 0;
/* Are we doing recovery by reading XLOG? */
/* Are we doing recovery from XLOG? */
bool InRecovery = false;
/* Are we recovering using offline XLOG archives? */
static bool InArchiveRecovery = false;
/* Was the last file restored from archive, or local? */
static bool restoredFromArchive = false;
static char recoveryRestoreCommand[MAXPGPATH];
static bool recoveryTarget = false;
static bool recoveryTargetExact = false;
static bool recoveryTargetInclusive = true;
static TransactionId recoveryTargetXid;
static time_t recoveryTargetTime;
/*
* MyLastRecPtr points to the start of the last XLOG record inserted by the
......@@ -369,10 +381,6 @@ static ControlFileData *ControlFile = NULL;
((xlrp).xrecoff - 1) / XLogSegSize == (logSeg))
#define XLogFileName(path, log, seg) \
snprintf(path, MAXPGPATH, "%s/%08X%08X", \
XLogDir, log, seg)
#define PrevBufIdx(idx) \
(((idx) == 0) ? XLogCtl->XLogCacheBlck : ((idx) - 1))
......@@ -383,6 +391,21 @@ static ControlFileData *ControlFile = NULL;
((xrecoff) % BLCKSZ >= SizeOfXLogPHD && \
(BLCKSZ - (xrecoff) % BLCKSZ) >= SizeOfXLogRecord)
/*
* These macros encapsulate knowledge about the exact layout of XLog file
* names as well as archive-status file names.
*/
#define MAXFNAMELEN 32
#define XLogFileName(fname, log, seg) \
snprintf(fname, MAXFNAMELEN, "%08X%08X", log, seg)
#define XLogFilePath(path, log, seg) \
snprintf(path, MAXPGPATH, "%s/%08X%08X", XLogDir, log, seg)
#define StatusFilePath(path, xlog, suffix) \
snprintf(path, MAXPGPATH, "%s/archive_status/%s%s", XLogDir, xlog, suffix)
/*
* _INTL_MAXLOGRECSZ: max space needed for a record including header and
* any backup-block data.
......@@ -434,6 +457,14 @@ static StartUpID lastReadSUI;
static bool InRedo = false;
static void XLogArchiveNotify(const char *xlog);
static void XLogArchiveNotifySeg(uint32 log, uint32 seg);
static bool XLogArchiveIsDone(const char *xlog);
static void XLogArchiveCleanup(const char *xlog);
static void readRecoveryCommandFile(void);
static void exitArchiveRecovery(uint32 endLogId, uint32 endLogSeg,
uint32 xrecoff);
static bool recoveryStopsHere(XLogRecord *record, bool *includeThis);
static bool AdvanceXLInsertBuffer(void);
static bool WasteXLInsertBuffer(void);
......@@ -444,6 +475,7 @@ static bool InstallXLogFileSegment(uint32 log, uint32 seg, char *tmppath,
bool find_free, int max_advance,
bool use_lock);
static int XLogFileOpen(uint32 log, uint32 seg, bool econt);
static void RestoreArchivedXLog(char *path, uint32 log, uint32 seg);
static void PreallocXlogFiles(XLogRecPtr endptr);
static void MoveOfflineLogs(uint32 log, uint32 seg, XLogRecPtr endptr);
static XLogRecord *ReadRecord(XLogRecPtr *RecPtr, int emode, char *buffer);
......@@ -911,6 +943,110 @@ begin:;
return (RecPtr);
}
/*
* XLogArchiveNotify
*
* Create an archive notification file
*
* The name of the notification file is the message that will be picked up
* by the archiver, e.g. we write 00000001000000C6.ready
* and the archiver then knows to archive XLogDir/00000001000000C6,
* then when complete, rename it to 00000001000000C6.done
*/
static void
XLogArchiveNotify(const char *xlog)
{
char archiveStatusPath[MAXPGPATH];
FILE *fd;
/* insert an otherwise empty file called <XLOG>.ready */
StatusFilePath(archiveStatusPath, xlog, ".ready");
fd = AllocateFile(archiveStatusPath, "w");
if (fd == NULL) {
ereport(LOG,
(errcode_for_file_access(),
errmsg("could not create archive status file \"%s\": %m",
archiveStatusPath)));
return;
}
if (FreeFile(fd)) {
ereport(LOG,
(errcode_for_file_access(),
errmsg("could not write archive status file \"%s\": %m",
archiveStatusPath)));
return;
}
/* Notify archiver that it's got something to do */
if (IsUnderPostmaster)
SendPostmasterSignal(PMSIGNAL_WAKEN_ARCHIVER);
}
/*
* Convenience routine to notify using log/seg representation of filename
*/
static void
XLogArchiveNotifySeg(uint32 log, uint32 seg)
{
char xlog[MAXFNAMELEN];
XLogFileName(xlog, log, seg);
XLogArchiveNotify(xlog);
}
/*
* XLogArchiveIsDone
*
* Checks for a ".done" archive notification file. This is called when we
* are ready to delete or recycle an old XLOG segment file. If it is okay
* to delete it then return true.
*
* If <XLOG>.done exists, then return true; else if <XLOG>.ready exists,
* then return false; else create <XLOG>.ready and return false. The
* last case covers the possibility that the original attempt to create
* <XLOG>.ready failed.
*/
static bool
XLogArchiveIsDone(const char *xlog)
{
char archiveStatusPath[MAXPGPATH];
struct stat stat_buf;
/* First check for .done --- this is the expected case */
StatusFilePath(archiveStatusPath, xlog, ".done");
if (stat(archiveStatusPath, &stat_buf) == 0)
return true;
/* check for .ready --- this means archiver is still busy with it */
StatusFilePath(archiveStatusPath, xlog, ".ready");
if (stat(archiveStatusPath, &stat_buf) == 0)
return false;
/* Race condition --- maybe archiver just finished, so recheck */
StatusFilePath(archiveStatusPath, xlog, ".done");
if (stat(archiveStatusPath, &stat_buf) == 0)
return true;
/* Retry creation of the .ready file */
XLogArchiveNotify(xlog);
return false;
}
/*
* XLogArchiveCleanup
*
* Cleanup an archive notification file for a particular xlog segment
*/
static void
XLogArchiveCleanup(const char *xlog)
{
char archiveStatusPath[MAXPGPATH];
StatusFilePath(archiveStatusPath, xlog, ".done");
unlink(archiveStatusPath);
/* should we complain about failure? */
}
/*
* Advance the Insert state to the next buffer page, writing out the next
* buffer if it still contains unwritten data.
......@@ -1255,11 +1391,17 @@ XLogWrite(XLogwrtRqst WriteRqst)
* and re-open prior segments when an fsync request comes along
* later. Doing it here ensures that one and only one backend will
* perform this fsync.
*
* This is also the right place to notify the Archiver that the
* segment is ready to copy to archival storage.
*/
if (openLogOff >= XLogSegSize && !ispartialpage)
{
issue_xlog_fsync();
LogwrtResult.Flush = LogwrtResult.Write; /* end of current page */
if (XLogArchivingActive())
XLogArchiveNotifySeg(openLogId, openLogSeg);
}
if (ispartialpage)
......@@ -1475,7 +1617,7 @@ XLogFileInit(uint32 log, uint32 seg,
int fd;
int nbytes;
XLogFileName(path, log, seg);
XLogFilePath(path, log, seg);
/*
* Try to use existent file (checkpoint maker may have created it
......@@ -1621,7 +1763,7 @@ InstallXLogFileSegment(uint32 log, uint32 seg, char *tmppath,
char path[MAXPGPATH];
struct stat stat_buf;
XLogFileName(path, log, seg);
XLogFilePath(path, log, seg);
/*
* We want to be sure that only one process does this at a time.
......@@ -1647,7 +1789,7 @@ InstallXLogFileSegment(uint32 log, uint32 seg, char *tmppath,
return false;
}
NextLogSeg(log, seg);
XLogFileName(path, log, seg);
XLogFilePath(path, log, seg);
}
}
......@@ -1686,7 +1828,10 @@ XLogFileOpen(uint32 log, uint32 seg, bool econt)
char path[MAXPGPATH];
int fd;
XLogFileName(path, log, seg);
if (InArchiveRecovery)
RestoreArchivedXLog(path, log, seg);
else
XLogFilePath(path, log, seg);
fd = BasicOpenFile(path, O_RDWR | PG_BINARY | XLOG_SYNC_BIT,
S_IRUSR | S_IWUSR);
......@@ -1706,9 +1851,192 @@ XLogFileOpen(uint32 log, uint32 seg, bool econt)
path, log, seg)));
}
/*
* XXX this is a pretty horrid hack. Remove after implementing timelines.
*
* if we switched back to local xlogs after having been
* restoring from archive, we need to make sure that the
* local files don't get removed by end-of-recovery checkpoint
* in case we need to re-run the recovery
*
* we want to copy these away as soon as possible, so set
* the archive status flag to .ready for them
* in case admin isn't cautious enough to have done this anyway
*
* XXX this is completely broken, because there is no guarantee this file
* is actually complete and ready to be archived. Also, what if there's
* a .done file for them?
*/
if (InArchiveRecovery && !restoredFromArchive)
XLogArchiveNotifySeg(log, seg);
return (fd);
}
/*
* Get next logfile segment when using off-line archive for recovery
*
* Attempt to retrieve the specified segment from off-line archival storage.
* If successful, fill "path" with its complete path (note that this will be
* a temp file name that doesn't follow the normal naming convention).
*
* If not successful, fill "path" with the name of the normal on-line segment
* file (which may or may not actually exist, but we'll try to use it).
*/
static void
RestoreArchivedXLog(char *path, uint32 log, uint32 seg)
{
char xlogfname[MAXFNAMELEN];
char xlogpath[MAXPGPATH];
char xlogRestoreCmd[MAXPGPATH];
char *dp;
char *endp;
const char *sp;
int rc;
struct stat stat_buf;
/*
* When doing archive recovery, we always prefer an archived log file
* even if a file of the same name exists in XLogDir. The reason is
* that the file in XLogDir could be an old, un-filled or partly-filled
* version that was copied and restored as part of backing up $PGDATA.
*
* We could try to optimize this slightly by checking the local
* copy lastchange timestamp against the archived copy,
* but we have no API to do this, nor can we guarantee that the
* lastchange timestamp was preserved correctly when we copied
* to archive. Our aim is robustness, so we elect not to do this.
*
* If we cannot obtain the log file from the archive, however, we
* will try to use the XLogDir file if it exists. This is so that
* we can make use of log segments that weren't yet transferred to
* the archive.
*
* Notice that we don't actually overwrite any files when we copy back
* from archive because the recoveryRestoreCommand may inadvertently
* restore inappropriate xlogs, or they may be corrupt, so we may
* wish to fallback to the segments remaining in current XLogDir later.
* The copy-from-archive filename is always the same, ensuring that we
* don't run out of disk space on long recoveries.
*/
XLogFileName(xlogfname, log, seg);
snprintf(xlogpath, MAXPGPATH, "%s/RECOVERYXLOG", XLogDir);
/*
* Make sure there is no existing RECOVERYXLOG file.
*/
if (stat(xlogpath, &stat_buf) != 0)
{
if (errno != ENOENT)
ereport(FATAL,
(errcode_for_file_access(),
errmsg("could not stat \"%s\": %m",
xlogpath)));
}
else
{
if (unlink(xlogpath) != 0)
ereport(FATAL,
(errcode_for_file_access(),
errmsg("could not remove \"%s\": %m",
xlogpath)));
}
/*
* construct the command to be executed
*/
dp = xlogRestoreCmd;
endp = xlogRestoreCmd + MAXPGPATH - 1;
*endp = '\0';
for (sp = recoveryRestoreCommand; *sp; sp++)
{
if (*sp == '%')
{
switch (sp[1])
{
case 'p':
/* %p: full path of target file */
sp++;
StrNCpy(dp, xlogpath, endp-dp);
dp += strlen(dp);
break;
case 'f':
/* %f: filename of desired file */
sp++;
StrNCpy(dp, xlogfname, endp-dp);
dp += strlen(dp);
break;
case '%':
/* convert %% to a single % */
sp++;
if (dp < endp)
*dp++ = *sp;
break;
default:
/* otherwise treat the % as not special */
if (dp < endp)
*dp++ = *sp;
break;
}
}
else
{
if (dp < endp)
*dp++ = *sp;
}
}
*dp = '\0';
ereport(DEBUG3,
(errmsg_internal("executing restore command \"%s\"",
xlogRestoreCmd)));
/*
* Copy xlog from archival storage to XLogDir
*/
rc = system(xlogRestoreCmd);
if (rc == 0)
{
/* restore success ... assuming file is really there now ... */
if (stat(xlogpath, &stat_buf) == 0) {
ereport(LOG,
(errmsg("restored log file \"%s\" from archive",
xlogfname)));
strcpy(path, xlogpath);
restoredFromArchive = true;
return;
}
if (errno != ENOENT)
ereport(FATAL,
(errcode_for_file_access(),
errmsg("could not stat \"%s\": %m",
xlogpath)));
}
/*
* remember, we rollforward UNTIL the restore fails
* so failure here is just part of the process...
* that makes it difficult to determine whether the restore
* failed because there isn't an archive to restore, or
* because the administrator has specified the restore
* program incorrectly. We have to assume the former.
*/
ereport(DEBUG1,
(errmsg("could not restore \"%s\" from archive: return code %d",
xlogfname, rc)));
/*
* if an archived file is not available, there might still be a version
* of this file in XLogDir, so return that as the filename to open.
*
* In many recovery scenarios we expect this to fail also, but
* if so that just means we've reached the end of WAL.
*/
XLogFilePath(path, log, seg);
restoredFromArchive = false;
}
/*
* Preallocate log files beyond the specified log endpoint, according to
* the XLOGfile user parameter.
......@@ -1745,7 +2073,7 @@ MoveOfflineLogs(uint32 log, uint32 seg, XLogRecPtr endptr)
uint32 endlogSeg;
DIR *xldir;
struct dirent *xlde;
char lastoff[32];
char lastoff[MAXFNAMELEN];
char path[MAXPGPATH];
XLByteToPrevSeg(endptr, endlogId, endlogSeg);
......@@ -1757,25 +2085,30 @@ MoveOfflineLogs(uint32 log, uint32 seg, XLogRecPtr endptr)
errmsg("could not open transaction log directory \"%s\": %m",
XLogDir)));
sprintf(lastoff, "%08X%08X", log, seg);
XLogFileName(lastoff, log, seg);
errno = 0;
while ((xlde = readdir(xldir)) != NULL)
{
/*
* use the alphanumeric sorting property of the filenames to decide
* which ones are earlier than the lastoff segment
*/
if (strlen(xlde->d_name) == 16 &&
strspn(xlde->d_name, "0123456789ABCDEF") == 16 &&
strcmp(xlde->d_name, lastoff) <= 0)
{
snprintf(path, MAXPGPATH, "%s/%s", XLogDir, xlde->d_name);
if (XLOG_archive_dir[0])
{
ereport(LOG,
(errmsg("archiving transaction log file \"%s\"",
xlde->d_name)));
elog(WARNING, "archiving log files is not implemented");
}
else
bool recycle;
if (XLogArchivingActive())
recycle = XLogArchiveIsDone(xlde->d_name);
else
recycle = true;
if (recycle)
{
snprintf(path, MAXPGPATH, "%s/%s", XLogDir, xlde->d_name);
/*
* Before deleting the file, see if it can be recycled as
* a future log segment. We allow recycling segments up
......@@ -1794,10 +2127,12 @@ MoveOfflineLogs(uint32 log, uint32 seg, XLogRecPtr endptr)
{
/* No need for any more future segments... */
ereport(LOG,
(errmsg("removing transaction log file \"%s\"",
xlde->d_name)));
(errmsg("removing transaction log file \"%s\"",
xlde->d_name)));
unlink(path);
}
XLogArchiveCleanup(xlde->d_name);
}
}
errno = 0;
......@@ -2771,6 +3106,401 @@ str_time(time_t tnow)
return buf;
}
/*
* See if there is a recovery command file (recovery.conf), and if so
* read in parameters for archive recovery.
*
* XXX longer term intention is to expand this to
* cater for additional parameters and controls
* possibly use a flex lexer similar to the GUC one
*/
static void
readRecoveryCommandFile(void)
{
char recoveryCommandFile[MAXPGPATH];
FILE *fd;
char cmdline[MAXPGPATH];
bool syntaxError = false;
snprintf(recoveryCommandFile, MAXPGPATH, "%s/recovery.conf", DataDir);
fd = AllocateFile(recoveryCommandFile, "r");
if (fd == NULL)
{
if (errno == ENOENT)
return; /* not there, so no archive recovery */
ereport(FATAL,
(errcode_for_file_access(),
errmsg("could not open recovery command file \"%s\": %m",
recoveryCommandFile)));
}
ereport(LOG,
(errmsg("starting archive recovery")));
/*
* Parse the file...
*/
while (fgets(cmdline, MAXPGPATH, fd) != NULL)
{
/* skip leading whitespace and check for # comment */
char *ptr;
char *tok1;
char *tok2;
for (ptr = cmdline; *ptr; ptr++)
{
if (!isspace((unsigned char) *ptr))
break;
}
if (*ptr == '\0' || *ptr == '#')
continue;
/* identify the quoted parameter value */
tok1 = strtok(ptr, "'");
if (!tok1)
{
syntaxError = true;
break;
}
tok2 = strtok(NULL, "'");
if (!tok2)
{
syntaxError = true;
break;
}
/* reparse to get just the parameter name */
tok1 = strtok(ptr, " \t=");
if (!tok1)
{
syntaxError = true;
break;
}
if (strcmp(tok1,"restore_command") == 0) {
StrNCpy(recoveryRestoreCommand, tok2, MAXPGPATH);
ereport(LOG,
(errmsg("restore_command = \"%s\"",
recoveryRestoreCommand)));
}
else if (strcmp(tok1,"recovery_target_xid") == 0) {
errno = 0;
recoveryTargetXid = (TransactionId) strtoul(tok2, NULL, 0);
if (errno == EINVAL || errno == ERANGE)
ereport(FATAL,
(errmsg("recovery_target_xid is not a valid number: \"%s\"",
tok2)));
ereport(LOG,
(errmsg("recovery_target_xid = %u",
recoveryTargetXid)));
recoveryTarget = true;
recoveryTargetExact = true;
}
else if (strcmp(tok1,"recovery_target_time") == 0) {
struct tm tm;
/*
* if recovery_target_xid specified, then this overrides
* recovery_target_time
*/
if (recoveryTargetExact)
continue;
recoveryTarget = true;
recoveryTargetExact = false;
/*
* convert the time string given
* by the user to the time_t format.
*/
if (strptime(tok2, "%Y-%m-%d %H:%M:%S", &tm) == NULL)
ereport(FATAL,
(errmsg("invalid recovery_target_time \"%s\"",
tok2),
errhint("Correct format is YYYY-MM-DD hh:mm:ss.")));
recoveryTargetTime = mktime(&tm);
if (recoveryTargetTime == (time_t) -1)
ereport(FATAL,
(errmsg("invalid recovery_target_time \"%s\"",
tok2),
errhint("Correct format is YYYY-MM-DD hh:mm:ss.")));
ereport(LOG,
(errmsg("recovery_target_time = %s",
tok2)));
}
else if (strcmp(tok1,"recovery_target_inclusive") == 0) {
/*
* does nothing if a recovery_target is not also set
*/
if (strcmp(tok2, "true") == 0)
recoveryTargetInclusive = true;
else
{
recoveryTargetInclusive = false;
tok2 = "false";
}
ereport(LOG,
(errmsg("recovery_target_inclusive = %s", tok2)));
}
else
ereport(FATAL,
(errmsg("unrecognized recovery parameter \"%s\"",
tok1)));
}
FreeFile(fd);
if (syntaxError)
ereport(FATAL,
(errmsg("syntax error in recovery command file: %s",
cmdline),
errhint("Lines should have the format parameter = 'value'.")));
/* Check that required parameters were supplied */
if (recoveryRestoreCommand[0] == '\0')
ereport(FATAL,
(errmsg("recovery command file \"%s\" did not specify restore_command",
recoveryCommandFile)));
/*
* clearly indicate our state
*/
InArchiveRecovery = true;
}
/*
* Exit archive-recovery state
*/
static void
exitArchiveRecovery(uint32 endLogId, uint32 endLogSeg, uint32 xrecoff)
{
char recoveryPath[MAXPGPATH];
char xlogpath[MAXPGPATH];
char recoveryCommandFile[MAXPGPATH];
char recoveryCommandDone[MAXPGPATH];
/*
* Disable fetches from archive, so we can use XLogFileOpen below.
*/
InArchiveRecovery = false;
/*
* We should have the ending log segment currently open. Verify,
* and then close it (to avoid problems on Windows with trying to
* rename or delete an open file).
*/
Assert(readFile >= 0);
Assert(readId == endLogId);
Assert(readSeg == endLogSeg);
close(readFile);
readFile = -1;
/*
* If the segment was fetched from archival storage, we want to replace
* the existing xlog segment (if any) with the archival version. This
* is because whatever is in XLogDir is very possibly older than what
* we have from the archives, since it could have come from restoring
* a PGDATA backup. In any case, the archival version certainly is
* more descriptive of what our current database state is, because that
* is what we replayed from.
*
* XXX there ought to be a timeline increment somewhere around here.
*/
snprintf(recoveryPath, MAXPGPATH, "%s/RECOVERYXLOG", XLogDir);
XLogFilePath(xlogpath, endLogId, endLogSeg);
if (restoredFromArchive)
{
ereport(DEBUG3,
(errmsg_internal("moving last restored xlog to \"%s\"",
xlogpath)));
unlink(xlogpath); /* might or might not exist */
if (rename(recoveryPath, xlogpath) != 0)
ereport(FATAL,
(errcode_for_file_access(),
errmsg("could not rename \"%s\" to \"%s\": %m",
recoveryPath, xlogpath)));
/* XXX might we need to fix permissions on the file? */
}
else
{
/*
* If the latest segment is not archival, but there's still a
* RECOVERYXLOG laying about, get rid of it.
*/
unlink(recoveryPath); /* ignore any error */
}
/*
* If we restored to a point-in-time, then the current WAL segment
* probably contains records beyond the stop point. These represent an
* extreme hazard: if we crash in the near future, the replay apparatus
* will know no reason why it shouldn't replay them. Therefore,
* explicitly zero out all the remaining pages of the segment. (We need
* not worry about the partial page in which the last record ends, since
* StartUpXlog will handle zeroing that. Also, there's nothing to do
* if we are right at a segment boundary.)
*
* XXX segment files beyond thhe current one also represent a hazard
* for the same reason. Need to invent timelines to fix this.
*/
/* align xrecoff to next page, then drop segment part */
if (xrecoff % BLCKSZ != 0)
xrecoff += (BLCKSZ - xrecoff % BLCKSZ);
xrecoff %= XLogSegSize;
if (recoveryTarget && xrecoff != 0)
{
int fd;
char zbuffer[BLCKSZ];
fd = XLogFileOpen(endLogId, endLogSeg, false);
MemSet(zbuffer, 0, sizeof(zbuffer));
if (lseek(fd, (off_t) xrecoff, SEEK_SET) < 0)
ereport(PANIC,
(errcode_for_file_access(),
errmsg("could not seek in file \"%s\": %m",
xlogpath)));
for (; xrecoff < XLogSegSize; xrecoff += sizeof(zbuffer))
{
errno = 0;
if ((int) write(fd, zbuffer, sizeof(zbuffer)) != (int) sizeof(zbuffer))
{
/* if write didn't set errno, assume problem is no disk space */
if (errno == 0)
errno = ENOSPC;
ereport(PANIC,
(errcode_for_file_access(),
errmsg("could not write to file \"%s\": %m", xlogpath)));
}
}
if (pg_fsync(fd) != 0)
ereport(PANIC,
(errcode_for_file_access(),
errmsg("could not fsync file \"%s\": %m", xlogpath)));
if (close(fd))
ereport(PANIC,
(errcode_for_file_access(),
errmsg("could not close file \"%s\": %m", xlogpath)));
}
/*
* Rename the config file out of the way, so that we don't accidentally
* re-enter archive recovery mode in a subsequent crash.
*/
snprintf(recoveryCommandFile, MAXPGPATH, "%s/recovery.conf", DataDir);
snprintf(recoveryCommandDone, MAXPGPATH, "%s/recovery.done", DataDir);
unlink(recoveryCommandDone);
if (rename(recoveryCommandFile, recoveryCommandDone) != 0)
ereport(FATAL,
(errcode_for_file_access(),
errmsg("could not rename \"%s\" to \"%s\": %m",
recoveryCommandFile, recoveryCommandDone)));
ereport(LOG,
(errmsg("archive recovery complete")));
}
/*
* For point-in-time recovery, this function decides whether we want to
* stop applying the XLOG at or after the current record.
*
* Returns TRUE if we are stopping, FALSE otherwise. On TRUE return,
* *includeThis is set TRUE if we should apply this record before stopping.
*/
static bool
recoveryStopsHere(XLogRecord *record, bool *includeThis)
{
bool stopsHere;
uint8 record_info;
time_t recordXtime;
/* Do we have a PITR target at all? */
if (!recoveryTarget)
return false;
/* We only consider stopping at COMMIT or ABORT records */
if (record->xl_rmid != RM_XACT_ID)
return false;
record_info = record->xl_info & ~XLR_INFO_MASK;
if (record_info == XLOG_XACT_COMMIT)
{
xl_xact_commit *recordXactCommitData;
recordXactCommitData = (xl_xact_commit *) XLogRecGetData(record);
recordXtime = recordXactCommitData->xtime;
}
else if (record_info == XLOG_XACT_ABORT)
{
xl_xact_abort *recordXactAbortData;
recordXactAbortData = (xl_xact_abort *) XLogRecGetData(record);
recordXtime = recordXactAbortData->xtime;
}
else
return false;
if (recoveryTargetExact)
{
/*
* there can be only one transaction end record
* with this exact transactionid
*
* when testing for an xid, we MUST test for
* equality only, since transactions are numbered
* in the order they start, not the order they
* complete. A higher numbered xid will complete
* before you about 50% of the time...
*/
stopsHere = (record->xl_xid == recoveryTargetXid);
if (stopsHere)
*includeThis = recoveryTargetInclusive;
}
else
{
/*
* there can be many transactions that
* share the same commit time, so
* we stop after the last one, if we are
* inclusive, or stop at the first one
* if we are exclusive
*/
if (recoveryTargetInclusive)
stopsHere = (recordXtime > recoveryTargetTime);
else
stopsHere = (recordXtime >= recoveryTargetTime);
if (stopsHere)
*includeThis = false;
}
if (stopsHere)
{
if (record_info == XLOG_XACT_COMMIT)
{
if (*includeThis)
ereport(LOG,
(errmsg("recovery stopping after commit of transaction %u, time %s",
record->xl_xid, str_time(recordXtime))));
else
ereport(LOG,
(errmsg("recovery stopping before commit of transaction %u, time %s",
record->xl_xid, str_time(recordXtime))));
}
else
{
if (*includeThis)
ereport(LOG,
(errmsg("recovery stopping after abort of transaction %u, time %s",
record->xl_xid, str_time(recordXtime))));
else
ereport(LOG,
(errmsg("recovery stopping before abort of transaction %u, time %s",
record->xl_xid, str_time(recordXtime))));
}
}
return stopsHere;
}
/*
* This must be called ONCE during postmaster or standalone-backend startup
*/
......@@ -2784,6 +3514,8 @@ StartupXLOG(void)
LastRec,
checkPointLoc,
EndOfLog;
uint32 endLogId;
uint32 endLogSeg;
XLogRecord *record;
char *buffer;
uint32 freespace;
......@@ -2833,6 +3565,12 @@ StartupXLOG(void)
pg_usleep(60000000L);
#endif
/*
* Check for recovery control file, and if so set up state for
* offline recovery
*/
readRecoveryCommandFile();
/*
* Get the last valid checkpoint record. If the latest one according
* to pg_control is broken, try the next-to-last one.
......@@ -2944,20 +3682,19 @@ StartupXLOG(void)
if (record != NULL)
{
bool recoveryContinue = true;
bool recoveryApply = true;
InRedo = true;
ereport(LOG,
(errmsg("redo starts at %X/%X",
ReadRecPtr.xlogid, ReadRecPtr.xrecoff)));
/*
* main redo apply loop
*/
do
{
/* nextXid must be beyond record's xid */
if (TransactionIdFollowsOrEquals(record->xl_xid,
ShmemVariableCache->nextXid))
{
ShmemVariableCache->nextXid = record->xl_xid;
TransactionIdAdvance(ShmemVariableCache->nextXid);
}
#ifdef WAL_DEBUG
if (XLOG_DEBUG)
{
......@@ -2974,16 +3711,40 @@ StartupXLOG(void)
}
#endif
/*
* Have we reached our recovery target?
*/
if (recoveryStopsHere(record, &recoveryApply))
{
recoveryContinue = false;
if (!recoveryApply)
break;
}
/* nextXid must be beyond record's xid */
if (TransactionIdFollowsOrEquals(record->xl_xid,
ShmemVariableCache->nextXid))
{
ShmemVariableCache->nextXid = record->xl_xid;
TransactionIdAdvance(ShmemVariableCache->nextXid);
}
if (record->xl_info & XLR_BKP_BLOCK_MASK)
RestoreBkpBlocks(record, EndRecPtr);
RmgrTable[record->xl_rmid].rm_redo(EndRecPtr, record);
LastRec = ReadRecPtr;
record = ReadRecord(NULL, LOG, buffer);
} while (record != NULL);
} while (record != NULL && recoveryContinue);
/*
* end of main redo apply loop
*/
ereport(LOG,
(errmsg("redo done at %X/%X",
ReadRecPtr.xlogid, ReadRecPtr.xrecoff)));
LastRec = ReadRecPtr;
InRedo = false;
}
else
......@@ -2992,12 +3753,29 @@ StartupXLOG(void)
}
/*
* Init xlog buffer cache using the block containing the last valid
* record from the previous incarnation.
* Re-fetch the last valid or last applied record, so we can identify
* the exact endpoint of what we consider the valid portion of WAL.
*/
record = ReadRecord(&LastRec, PANIC, buffer);
EndOfLog = EndRecPtr;
XLByteToPrevSeg(EndOfLog, openLogId, openLogSeg);
XLByteToPrevSeg(EndOfLog, endLogId, endLogSeg);
/*
* We are now done reading the old WAL. Turn off archive fetching
* if it was active, and make a writable copy of the last WAL segment.
* (Note that we also have a copy of the last block of the old WAL in
* readBuf; we will use that below.)
*/
if (InArchiveRecovery)
exitArchiveRecovery(endLogId, endLogSeg, EndOfLog.xrecoff);
/*
* Prepare to write WAL starting at EndOfLog position, and init xlog
* buffer cache using the block containing the last record from the
* previous incarnation.
*/
openLogId = endLogId;
openLogSeg = endLogSeg;
openLogFile = XLogFileOpen(openLogId, openLogSeg, false);
openLogOff = 0;
ControlFile->logId = openLogId;
......@@ -3707,6 +4485,7 @@ xlog_desc(char *buf, uint8 xl_info, char *rec)
}
#ifdef WAL_DEBUG
static void
xlog_outrec(char *buf, XLogRecord *record)
{
......@@ -3731,6 +4510,7 @@ xlog_outrec(char *buf, XLogRecord *record)
sprintf(buf + strlen(buf), ": %s",
RmgrTable[record->xl_rmid].rm_name);
}
#endif /* WAL_DEBUG */
......
......@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/commands/tablecmds.c,v 1.120 2004/07/17 17:28:29 tgl Exp $
* $PostgreSQL: pgsql/src/backend/commands/tablecmds.c,v 1.121 2004/07/19 02:47:06 tgl Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -5414,15 +5414,8 @@ copy_relation_data(Relation rel, SMgrRelation dst)
/*
* We need to log the copied data in WAL iff WAL archiving is enabled
* AND it's not a temp rel.
*
* XXX when WAL archiving is actually supported, this test will likely
* need to change; and the hardwired extern is cruddy anyway ...
*/
{
extern char XLOG_archive_dir[];
use_wal = XLOG_archive_dir[0] && !rel->rd_istemp;
}
use_wal = XLogArchivingActive() && !rel->rd_istemp;
nblocks = RelationGetNumberOfBlocks(rel);
for (blkno = 0; blkno < nblocks; blkno++)
......
......@@ -4,7 +4,7 @@
# Makefile for src/backend/postmaster
#
# IDENTIFICATION
# $PostgreSQL: pgsql/src/backend/postmaster/Makefile,v 1.15 2004/05/29 22:48:19 tgl Exp $
# $PostgreSQL: pgsql/src/backend/postmaster/Makefile,v 1.16 2004/07/19 02:47:08 tgl Exp $
#
#-------------------------------------------------------------------------
......@@ -12,7 +12,7 @@ subdir = src/backend/postmaster
top_builddir = ../../..
include $(top_builddir)/src/Makefile.global
OBJS = postmaster.o bgwriter.o pgstat.o
OBJS = postmaster.o bgwriter.o pgstat.o pgarch.o
all: SUBSYS.o
......
/*-------------------------------------------------------------------------
*
* pgarch.c
*
* PostgreSQL WAL archiver
*
* All functions relating to archiver are included here
*
* - All functions executed by archiver process
*
* - archiver is forked from postmaster, and the two
* processes then communicate using signals. All functions
* executed by postmaster are included in this file.
*
* Initial author: Simon Riggs simon@2ndquadrant.com
*
* Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/postmaster/pgarch.c,v 1.1 2004/07/19 02:47:08 tgl Exp $
*
*-------------------------------------------------------------------------
*/
#include "postgres.h"
#include <fcntl.h>
#include <signal.h>
#include <time.h>
#include <sys/time.h>
#include <unistd.h>
#include "postmaster/pgarch.h"
#include "libpq/pqsignal.h"
#include "miscadmin.h"
#include "postmaster/postmaster.h"
#include "storage/fd.h"
#include "storage/ipc.h"
#include "storage/pg_shmem.h"
#include "storage/pmsignal.h"
#include "utils/guc.h"
#include "utils/ps_status.h"
/* ----------
* Timer definitions.
* ----------
*/
#define PGARCH_AUTOWAKE_INTERVAL 60 /* How often to force a poll of
* the archive status directory;
* in seconds. */
#define PGARCH_RESTART_INTERVAL 60 /* How often to attempt to restart
* a failed archiver; in seconds. */
/* ----------
* Archiver control info.
*
* We expect that archivable files within pg_xlog will have names between
* MIN_XFN_CHARS and MAX_XFN_CHARS in length, consisting only of characters
* appearing in VALID_XFN_CHARS. The status files in archive_status have
* corresponding names with ".ready" or ".done" appended.
* ----------
*/
#define MIN_XFN_CHARS 16
#define MAX_XFN_CHARS 16
#define VALID_XFN_CHARS "0123456789ABCDEF"
#define NUM_ARCHIVE_RETRIES 3
/* ----------
* Local data
* ----------
*/
static char XLogDir[MAXPGPATH];
static char XLogArchiveStatusDir[MAXPGPATH];
static time_t last_pgarch_start_time;
/*
* Flags set by interrupt handlers for later service in the main loop.
*/
static volatile sig_atomic_t got_SIGHUP = false;
static volatile sig_atomic_t wakened = false;
/* ----------
* Local function forward declarations
* ----------
*/
#ifdef EXEC_BACKEND
static pid_t pgarch_forkexec(void);
#endif
NON_EXEC_STATIC void PgArchiverMain(int argc, char *argv[]);
static void pgarch_exit(SIGNAL_ARGS);
static void ArchSigHupHandler(SIGNAL_ARGS);
static void pgarch_waken(SIGNAL_ARGS);
static void pgarch_MainLoop(void);
static void pgarch_ArchiverCopyLoop(void);
static bool pgarch_archiveXlog(char *xlog);
static bool pgarch_readyXlog(char *xlog);
static void pgarch_archiveDone(char *xlog);
/* ------------------------------------------------------------
* Public functions called from postmaster follow
* ------------------------------------------------------------
*/
/*
* pgarch_start
*
* Called from postmaster at startup or after an existing archiver
* died. Attempt to fire up a fresh archiver process.
*
* Returns PID of child process, or 0 if fail.
*
* Note: if fail, we will be called again from the postmaster main loop.
*/
int
pgarch_start(void)
{
time_t curtime;
pid_t pgArchPid;
/*
* Do nothing if no archiver needed
*/
if (!XLogArchivingActive())
return 0;
/*
* Do nothing if too soon since last archiver start. This is a
* safety valve to protect against continuous respawn attempts if the
* archiver is dying immediately at launch. Note that since we will
* be re-called from the postmaster main loop, we will get another
* chance later.
*/
curtime = time(NULL);
if ((unsigned int) (curtime - last_pgarch_start_time) <
(unsigned int) PGARCH_RESTART_INTERVAL)
return 0;
last_pgarch_start_time = curtime;
fflush(stdout);
fflush(stderr);
#ifdef __BEOS__
/* Specific beos actions before backend startup */
beos_before_backend_startup();
#endif
#ifdef EXEC_BACKEND
switch ((pgArchPid = pgarch_forkexec()))
#else
switch ((pgArchPid = fork()))
#endif
{
case -1:
#ifdef __BEOS__
/* Specific beos actions */
beos_backend_startup_failed();
#endif
ereport(LOG,
(errmsg("could not fork archiver: %m")));
return 0;
#ifndef EXEC_BACKEND
case 0:
/* in postmaster child ... */
#ifdef __BEOS__
/* Specific beos actions after backend startup */
beos_backend_startup();
#endif
/* Close the postmaster's sockets */
ClosePostmasterPorts();
/* Drop our connection to postmaster's shared memory, as well */
PGSharedMemoryDetach();
PgArchiverMain(0, NULL);
break;
#endif
default:
return (int) pgArchPid;
}
/* shouldn't get here */
return 0;
}
/* ------------------------------------------------------------
* Local functions called by archiver follow
* ------------------------------------------------------------
*/
#ifdef EXEC_BACKEND
/*
* pgarch_forkexec() -
*
* Format up the arglist for, then fork and exec, archive process
*/
static pid_t
pgarch_forkexec(void)
{
char *av[10];
int ac = 0;
av[ac++] = "postgres";
av[ac++] = "-forkarch";
av[ac++] = NULL; /* filled in by postmaster_forkexec */
av[ac] = NULL;
Assert(ac < lengthof(av));
return postmaster_forkexec(ac, av);
}
#endif /* EXEC_BACKEND */
/*
* PgArchiverMain
*
* The argc/argv parameters are valid only in EXEC_BACKEND case. However,
* since we don't use 'em, it hardly matters...
*/
NON_EXEC_STATIC void
PgArchiverMain(int argc, char *argv[])
{
IsUnderPostmaster = true; /* we are a postmaster subprocess now */
MyProcPid = getpid(); /* reset MyProcPid */
/* Lose the postmaster's on-exit routines */
on_exit_reset();
/*
* Ignore all signals usually bound to some action in the postmaster,
* except for SIGHUP, SIGUSR1 and SIGQUIT.
*/
pqsignal(SIGHUP, ArchSigHupHandler);
pqsignal(SIGINT, SIG_IGN);
pqsignal(SIGTERM, SIG_IGN);
pqsignal(SIGQUIT, pgarch_exit);
pqsignal(SIGALRM, SIG_IGN);
pqsignal(SIGPIPE, SIG_IGN);
pqsignal(SIGUSR1, pgarch_waken);
pqsignal(SIGUSR2, SIG_IGN);
pqsignal(SIGCHLD, SIG_DFL);
pqsignal(SIGTTIN, SIG_DFL);
pqsignal(SIGTTOU, SIG_DFL);
pqsignal(SIGCONT, SIG_DFL);
pqsignal(SIGWINCH, SIG_DFL);
PG_SETMASK(&UnBlockSig);
/*
* Identify myself via ps
*/
init_ps_display("archiver process", "", "");
set_ps_display("");
/* Init XLOG file paths */
snprintf(XLogDir, MAXPGPATH, "%s/pg_xlog", DataDir);
snprintf(XLogArchiveStatusDir, MAXPGPATH, "%s/archive_status", XLogDir);
pgarch_MainLoop();
exit(0);
}
/* SIGQUIT signal handler for archiver process */
static void
pgarch_exit(SIGNAL_ARGS)
{
/*
* For now, we just nail the doors shut and get out of town. It might
* seem cleaner to finish up any pending archive copies, but there's
* a nontrivial risk that init will kill us partway through.
*/
exit(0);
}
/* SIGHUP: set flag to re-read config file at next convenient time */
static void
ArchSigHupHandler(SIGNAL_ARGS)
{
got_SIGHUP = true;
}
/* SIGUSR1 signal handler for archiver process */
static void
pgarch_waken(SIGNAL_ARGS)
{
wakened = true;
}
/*
* pgarch_MainLoop
*
* Main loop for archiver
*/
static void
pgarch_MainLoop(void)
{
time_t last_copy_time = 0;
time_t curtime;
/*
* We run the copy loop immediately upon entry, in case there are
* unarchived files left over from a previous database run (or maybe
* the archiver died unexpectedly). After that we wait for a signal
* or timeout before doing more.
*/
wakened = true;
do {
/* Check for config update */
if (got_SIGHUP)
{
got_SIGHUP = false;
ProcessConfigFile(PGC_SIGHUP);
if (!XLogArchivingActive())
break; /* user wants us to shut down */
}
/* Do what we're here for */
if (wakened)
{
wakened = false;
pgarch_ArchiverCopyLoop();
last_copy_time = time(NULL);
}
/*
* There shouldn't be anything for the archiver to do except
* to wait for a signal, so we could use pause(3) here...
* ...however, the archiver exists to protect our data, so
* she wakes up occasionally to allow herself to be proactive.
* In particular this avoids getting stuck if a signal arrives
* just before we enter sleep().
*/
if (!wakened)
{
sleep(PGARCH_AUTOWAKE_INTERVAL);
curtime = time(NULL);
if ((unsigned int) (curtime - last_copy_time) >=
(unsigned int) PGARCH_AUTOWAKE_INTERVAL)
wakened = true;
}
} while (PostmasterIsAlive(true));
}
/*
* pgarch_ArchiverCopyLoop
*
* Archives all outstanding xlogs then returns
*/
static void
pgarch_ArchiverCopyLoop(void)
{
char xlog[MAX_XFN_CHARS + 1];
/*
* loop through all xlogs with archive_status of .ready
* and archive them...mostly we expect this to be a single
* file, though it is possible some backend will add
* files onto the list of those that need archiving while we
* are still copying earlier archives
*/
while (pgarch_readyXlog(xlog))
{
int failures = 0;
for (;;)
{
if (pgarch_archiveXlog(xlog))
{
/* successful */
pgarch_archiveDone(xlog);
break; /* out of inner retry loop */
}
else
{
if (++failures >= NUM_ARCHIVE_RETRIES)
{
ereport(WARNING,
(errmsg("transaction log file \"%s\" could not be archived",
xlog)));
return; /* give up archiving for now */
}
sleep(1); /* wait a bit before retrying */
}
}
}
}
/*
* pgarch_archiveXlog
*
* Invokes system(3) to copy one archive file to wherever it should go
*
* Returns true if successful
*/
static bool
pgarch_archiveXlog(char *xlog)
{
char xlogarchcmd[MAXPGPATH];
char pathname[MAXPGPATH];
char *dp;
char *endp;
const char *sp;
int rc;
snprintf(pathname, MAXPGPATH, "%s/%s", XLogDir, xlog);
/*
* construct the command to be executed
*/
dp = xlogarchcmd;
endp = xlogarchcmd + MAXPGPATH - 1;
*endp = '\0';
for (sp = XLogArchiveCommand; *sp; sp++)
{
if (*sp == '%')
{
switch (sp[1])
{
case 'p':
/* %p: full path of source file */
sp++;
StrNCpy(dp, pathname, endp-dp);
dp += strlen(dp);
break;
case 'f':
/* %f: filename of source file */
sp++;
StrNCpy(dp, xlog, endp-dp);
dp += strlen(dp);
break;
case '%':
/* convert %% to a single % */
sp++;
if (dp < endp)
*dp++ = *sp;
break;
default:
/* otherwise treat the % as not special */
if (dp < endp)
*dp++ = *sp;
break;
}
}
else
{
if (dp < endp)
*dp++ = *sp;
}
}
*dp = '\0';
ereport(DEBUG3,
(errmsg_internal("executing archive command \"%s\"",
xlogarchcmd)));
rc = system(xlogarchcmd);
if (rc != 0) {
ereport(LOG,
(errmsg("archive command \"%s\" failed: return code %d",
xlogarchcmd, rc)));
return false;
}
ereport(LOG,
(errmsg("archived transaction log file \"%s\"", xlog)));
return true;
}
/*
* pgarch_readyXlog
*
* Return name of the oldest xlog file that has not yet been archived.
* No notification is set that file archiving is now in progress, so
* this would need to be extended if multiple concurrent archival
* tasks were created. If a failure occurs, we will completely
* re-copy the file at the next available opportunity.
*
* It is important that we return the oldest, so that we archive xlogs
* in order that they were written, for two reasons:
* 1) to maintain the sequential chain of xlogs required for recovery
* 2) because the oldest ones will sooner become candidates for
* recycling at time of checkpoint
*/
static bool
pgarch_readyXlog(char *xlog)
{
/*
* open xlog status directory and read through list of
* xlogs that have the .ready suffix, looking for earliest file.
* It is possible to optimise this code, though only a single
* file is expected on the vast majority of calls, so....
*/
char newxlog[MAX_XFN_CHARS + 6 + 1];
DIR *rldir;
struct dirent *rlde;
bool found = false;
rldir = AllocateDir(XLogArchiveStatusDir);
if (rldir == NULL)
ereport(ERROR,
(errcode_for_file_access(),
errmsg("could not open archive status directory \"%s\": %m",
XLogArchiveStatusDir)));
errno = 0;
while ((rlde = readdir(rldir)) != NULL)
{
int basenamelen = (int) strlen(rlde->d_name) - 6;
if (basenamelen >= MIN_XFN_CHARS &&
basenamelen <= MAX_XFN_CHARS &&
strspn(rlde->d_name, VALID_XFN_CHARS) >= basenamelen &&
strcmp(rlde->d_name + basenamelen, ".ready") == 0)
{
if (!found) {
strcpy(newxlog, rlde->d_name);
found = true;
} else {
if (strcmp(rlde->d_name, newxlog) < 0)
strcpy(newxlog, rlde->d_name);
}
}
errno = 0;
}
#ifdef WIN32
/* This fix is in mingw cvs (runtime/mingwex/dirent.c rev 1.4), but
not in released version */
if (GetLastError() == ERROR_NO_MORE_FILES)
errno = 0;
#endif
if (errno)
ereport(ERROR,
(errcode_for_file_access(),
errmsg("could not read archive status directory \"%s\": %m",
XLogArchiveStatusDir)));
FreeDir(rldir);
if (found)
{
/* truncate off the .ready */
newxlog[strlen(newxlog) - 6] = '\0';
strcpy(xlog, newxlog);
}
return found;
}
/*
* pgarch_archiveDone
*
* Emit notification that an xlog file has been successfully archived.
* We do this by renaming the status file from NNN.ready to NNN.done.
* Eventually, a checkpoint process will notice this and delete both the
* NNN.done file and the xlog file itself.
*/
static void
pgarch_archiveDone(char *xlog)
{
char rlogready[MAXPGPATH];
char rlogdone[MAXPGPATH];
int rc;
snprintf(rlogready, MAXPGPATH, "%s/%s.ready", XLogArchiveStatusDir, xlog);
snprintf(rlogdone, MAXPGPATH, "%s/%s.done", XLogArchiveStatusDir, xlog);
rc = rename(rlogready, rlogdone);
if (rc < 0)
ereport(WARNING,
(errcode_for_file_access(),
errmsg("could not rename \"%s\": %m",
rlogready)));
}
......@@ -37,7 +37,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/postmaster/postmaster.c,v 1.411 2004/07/12 19:14:56 momjian Exp $
* $PostgreSQL: pgsql/src/backend/postmaster/postmaster.c,v 1.412 2004/07/19 02:47:08 tgl Exp $
*
* NOTES
*
......@@ -104,6 +104,7 @@
#include "miscadmin.h"
#include "nodes/nodes.h"
#include "postmaster/postmaster.h"
#include "postmaster/pgarch.h"
#include "storage/fd.h"
#include "storage/ipc.h"
#include "storage/pg_shmem.h"
......@@ -198,6 +199,7 @@ char *preload_libraries_string = NULL;
/* PIDs of special child processes; 0 when not running */
static pid_t StartupPID = 0,
BgWriterPID = 0,
PgArchPID = 0,
PgStatPID = 0;
/* Startup/shutdown state */
......@@ -826,7 +828,8 @@ PostmasterMain(int argc, char *argv[])
*
* CAUTION: when changing this list, check for side-effects on the signal
* handling setup of child processes. See tcop/postgres.c,
* bootstrap/bootstrap.c, postmaster/bgwriter.c, and postmaster/pgstat.c.
* bootstrap/bootstrap.c, postmaster/bgwriter.c, postmaster/pgarch.c,
* and postmaster/pgstat.c.
*/
pqinitmask();
PG_SETMASK(&BlockSig);
......@@ -1217,6 +1220,11 @@ ServerLoop(void)
kill(BgWriterPID, SIGUSR2);
}
/* If we have lost the archiver, try to start a new one */
if (XLogArchivingActive() && PgArchPID == 0 &&
StartupPID == 0 && !FatalError && Shutdown == NoShutdown)
PgArchPID = pgarch_start();
/* If we have lost the stats collector, try to start a new one */
if (PgStatPID == 0 &&
StartupPID == 0 && !FatalError && Shutdown == NoShutdown)
......@@ -1760,6 +1768,8 @@ SIGHUP_handler(SIGNAL_ARGS)
SignalChildren(SIGHUP);
if (BgWriterPID != 0)
kill(BgWriterPID, SIGHUP);
if (PgArchPID != 0)
kill(PgArchPID, SIGHUP);
/* PgStatPID does not currently need SIGHUP */
load_hba();
load_ident();
......@@ -1818,6 +1828,9 @@ pmdie(SIGNAL_ARGS)
/* And tell it to shut down */
if (BgWriterPID != 0)
kill(BgWriterPID, SIGUSR2);
/* Tell pgarch to shut down too; nothing left for it to do */
if (PgArchPID != 0)
kill(PgArchPID, SIGQUIT);
/* Tell pgstat to shut down too; nothing left for it to do */
if (PgStatPID != 0)
kill(PgStatPID, SIGQUIT);
......@@ -1862,6 +1875,9 @@ pmdie(SIGNAL_ARGS)
/* And tell it to shut down */
if (BgWriterPID != 0)
kill(BgWriterPID, SIGUSR2);
/* Tell pgarch to shut down too; nothing left for it to do */
if (PgArchPID != 0)
kill(PgArchPID, SIGQUIT);
/* Tell pgstat to shut down too; nothing left for it to do */
if (PgStatPID != 0)
kill(PgStatPID, SIGQUIT);
......@@ -1880,6 +1896,8 @@ pmdie(SIGNAL_ARGS)
kill(StartupPID, SIGQUIT);
if (BgWriterPID != 0)
kill(BgWriterPID, SIGQUIT);
if (PgArchPID != 0)
kill(PgArchPID, SIGQUIT);
if (PgStatPID != 0)
kill(PgStatPID, SIGQUIT);
if (DLGetHead(BackendList))
......@@ -1967,12 +1985,16 @@ reaper(SIGNAL_ARGS)
/*
* Go to shutdown mode if a shutdown request was pending.
* Otherwise, try to start the stats collector too.
* Otherwise, try to start the archiver and stats collector too.
*/
if (Shutdown > NoShutdown && BgWriterPID != 0)
kill(BgWriterPID, SIGUSR2);
else if (PgStatPID == 0 && Shutdown == NoShutdown)
PgStatPID = pgstat_start();
else if (Shutdown == NoShutdown) {
if (XLogArchivingActive() && PgArchPID == 0)
PgArchPID = pgarch_start();
if (PgStatPID == 0)
PgStatPID = pgstat_start();
}
continue;
}
......@@ -2004,6 +2026,23 @@ reaper(SIGNAL_ARGS)
continue;
}
/*
* Was it the archiver? If so, just try to start a new
* one; no need to force reset of the rest of the system. (If fail,
* we'll try again in future cycles of the main loop.)
*/
if (PgArchPID != 0 && pid == PgArchPID)
{
PgArchPID = 0;
if (exitstatus != 0)
LogChildExit(LOG, gettext("archiver process"),
pid, exitstatus);
if (XLogArchivingActive() &&
StartupPID == 0 && !FatalError && Shutdown == NoShutdown)
PgArchPID = pgarch_start();
continue;
}
/*
* Was it the statistics collector? If so, just try to start a new
* one; no need to force reset of the rest of the system. (If fail,
......@@ -2029,8 +2068,9 @@ reaper(SIGNAL_ARGS)
if (FatalError)
{
/*
* Wait for all children exit, then reset shmem and
* StartupDataBase.
* Wait for all important children to exit, then reset shmem and
* StartupDataBase. (We can ignore the archiver and stats processes
* here since they are not connected to shmem.)
*/
if (DLGetHead(BackendList) || StartupPID != 0 || BgWriterPID != 0)
goto reaper_done;
......@@ -2191,6 +2231,17 @@ HandleChildCrash(int pid,
kill(BgWriterPID, (SendStop ? SIGSTOP : SIGQUIT));
}
/* Force a power-cycle of the pgarch process too */
/* (Shouldn't be necessary, but just for luck) */
if (PgArchPID != 0 && !FatalError)
{
ereport(DEBUG2,
(errmsg_internal("sending %s to process %d",
"SIGQUIT",
(int) PgArchPID)));
kill(PgArchPID, SIGQUIT);
}
/* Force a power-cycle of the pgstat processes too */
/* (Shouldn't be necessary, but just for luck) */
if (PgStatPID != 0 && !FatalError)
......@@ -2873,6 +2924,16 @@ SubPostmasterMain(int argc, char *argv[])
BootstrapMain(argc - 2, argv + 2);
proc_exit(0);
}
if (strcmp(argv[1], "-forkarch") == 0)
{
/* Close the postmaster's sockets */
ClosePostmasterPorts();
/* Do not want to attach to shared memory */
PgArchiverMain(argc, argv);
proc_exit(0);
}
if (strcmp(argv[1], "-forkbuf") == 0)
{
/* Close the postmaster's sockets */
......@@ -2951,6 +3012,18 @@ sigusr1_handler(SIGNAL_ARGS)
if (Shutdown <= SmartShutdown)
SignalChildren(SIGUSR1);
}
if (PgArchPID != 0 && Shutdown == NoShutdown)
{
if (CheckPostmasterSignal(PMSIGNAL_WAKEN_ARCHIVER))
{
/*
* Send SIGUSR1 to archiver process, to wake it up and begin
* archiving next transaction log file.
*/
kill(PgArchPID, SIGUSR1);
}
}
PG_SETMASK(&UnBlockSig);
......
......@@ -10,7 +10,7 @@
* Written by Peter Eisentraut <peter_e@gmx.net>.
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/utils/misc/guc.c,v 1.219 2004/07/12 02:22:51 momjian Exp $
* $PostgreSQL: pgsql/src/backend/utils/misc/guc.c,v 1.220 2004/07/19 02:47:10 tgl Exp $
*
*--------------------------------------------------------------------
*/
......@@ -1371,6 +1371,15 @@ static struct config_real ConfigureNamesReal[] =
static struct config_string ConfigureNamesString[] =
{
{
{"archive_command", PGC_SIGHUP, WAL_SETTINGS,
gettext_noop("WAL archiving command."),
gettext_noop("The shell command that will be called to archive a WAL file.")
},
&XLogArchiveCommand,
"", NULL, NULL
},
{
{"client_encoding", PGC_USERSET, CLIENT_CONN_LOCALE,
gettext_noop("Sets the client's character set encoding."),
......
......@@ -111,6 +111,16 @@
#commit_delay = 0 # range 0-100000, in microseconds
#commit_siblings = 5 # range 1-1000
# - Archiving -
#archive_command = '' # command to use to archive a logfile segment
# If archive_command is '' then archiving is disabled. Otherwise, set it
# to a command to copy a file to the proper place. A simplistic example
# is 'cp %p /mnt/server/archivedir/%f'. Any %p in the string is replaced
# by the absolute path of the file to archive, while any %f is replaced by
# the file name only. NOTE: it is important for the command to return
# zero exit status if and only if it succeeded.
#---------------------------------------------------------------------------
# QUERY TUNING
......
......@@ -39,7 +39,7 @@
* Portions Copyright (c) 1994, Regents of the University of California
* Portions taken from FreeBSD.
*
* $PostgreSQL: pgsql/src/bin/initdb/initdb.c,v 1.43 2004/07/14 17:55:10 petere Exp $
* $PostgreSQL: pgsql/src/bin/initdb/initdb.c,v 1.44 2004/07/19 02:47:12 tgl Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -145,7 +145,7 @@ static char *get_id(void);
static char *get_encoding_id(char *encoding_name);
static char *get_short_version(void);
static int check_data_dir(void);
static bool mkdatadir(char *subdir);
static bool mkdatadir(const char *subdir);
static void set_input(char **dest, char *filename);
static void check_input(char *path);
static void set_short_version(char *short_version, char *extrapath);
......@@ -900,7 +900,7 @@ check_data_dir(void)
* make the data directory (or one of its subdirectories if subdir is not NULL)
*/
static bool
mkdatadir(char *subdir)
mkdatadir(const char *subdir)
{
char *path;
......@@ -2022,8 +2022,16 @@ main(int argc, char *argv[])
char *short_version;
char *pgdenv; /* PGDATA value got from sent to
* environment */
char *subdirs[] =
{"global", "pg_xlog", "pg_clog", "pg_subtrans", "base", "base/1", "pg_tblspc"};
static const char *subdirs[] = {
"global",
"pg_xlog",
"pg_xlog/archive_status",
"pg_clog",
"pg_subtrans",
"base",
"base/1",
"pg_tblspc"
};
progname = get_progname(argv[0]);
set_pglocale_pgservice(argv[0], "initdb");
......
......@@ -6,7 +6,7 @@
* Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* $PostgreSQL: pgsql/src/include/access/xlog.h,v 1.52 2004/07/01 00:51:38 tgl Exp $
* $PostgreSQL: pgsql/src/include/access/xlog.h,v 1.53 2004/07/19 02:47:13 tgl Exp $
*/
#ifndef XLOG_H
#define XLOG_H
......@@ -208,9 +208,12 @@ extern XLogRecPtr ProcLastRecEnd;
/* these variables are GUC parameters related to XLOG */
extern int CheckPointSegments;
extern int XLOGbuffers;
extern char *XLogArchiveCommand;
extern char *XLOG_sync_method;
extern const char XLOG_sync_method_default[];
#define XLogArchivingActive() (XLogArchiveCommand[0] != '\0')
#ifdef WAL_DEBUG
extern bool XLOG_DEBUG;
#endif
......
/*-------------------------------------------------------------------------
*
* pgarch.h
* Exports from postmaster/pgarch.c.
*
* Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* $PostgreSQL: pgsql/src/include/postmaster/pgarch.h,v 1.1 2004/07/19 02:47:16 tgl Exp $
*
*-------------------------------------------------------------------------
*/
#ifndef _PGARCH_H
#define _PGARCH_H
/* ----------
* Functions called from postmaster
* ----------
*/
extern int pgarch_start(void);
#ifdef EXEC_BACKEND
extern void PgArchiverMain(int argc, char *argv[]);
#endif
#endif /* _PGARCH_H */
......@@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* $PostgreSQL: pgsql/src/include/storage/pmsignal.h,v 1.8 2004/05/29 22:48:23 tgl Exp $
* $PostgreSQL: pgsql/src/include/storage/pmsignal.h,v 1.9 2004/07/19 02:47:15 tgl Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -24,6 +24,7 @@ typedef enum
{
PMSIGNAL_PASSWORD_CHANGE, /* pg_pwd file has changed */
PMSIGNAL_WAKEN_CHILDREN, /* send a SIGUSR1 signal to all backends */
PMSIGNAL_WAKEN_ARCHIVER, /* send a NOTIFY signal to xlog archiver */
NUM_PMSIGNALS /* Must be last value of enum! */
} PMSignalReason;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment