Commit 3dba9cb6 authored by Tom Lane's avatar Tom Lane

Add a check on file size as an additional safety check that a WAL file

recovered from archive is not corrupt.  It's not much but it will catch
one common problem, viz out-of-disk-space.
Also, force a WAL recovery scan when recovery.conf is present, even if
pg_control shows a clean shutdown.  This allows recovery with a tar backup
that was taken with the postmaster shut down, as per complaint from
Mark Kirkwood.
parent 406e8aa2
...@@ -7,7 +7,7 @@ ...@@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group * Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
* *
* $PostgreSQL: pgsql/src/backend/access/transam/xlog.c,v 1.150 2004/07/21 22:31:20 tgl Exp $ * $PostgreSQL: pgsql/src/backend/access/transam/xlog.c,v 1.151 2004/07/22 20:18:40 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -432,7 +432,7 @@ static bool InstallXLogFileSegment(uint32 log, uint32 seg, char *tmppath, ...@@ -432,7 +432,7 @@ static bool InstallXLogFileSegment(uint32 log, uint32 seg, char *tmppath,
static int XLogFileOpen(uint32 log, uint32 seg); static int XLogFileOpen(uint32 log, uint32 seg);
static int XLogFileRead(uint32 log, uint32 seg, int emode); static int XLogFileRead(uint32 log, uint32 seg, int emode);
static bool RestoreArchivedFile(char *path, const char *xlogfname, static bool RestoreArchivedFile(char *path, const char *xlogfname,
const char *recovername); const char *recovername, off_t expectedSize);
static void PreallocXlogFiles(XLogRecPtr endptr); static void PreallocXlogFiles(XLogRecPtr endptr);
static void MoveOfflineLogs(uint32 log, uint32 seg, XLogRecPtr endptr); static void MoveOfflineLogs(uint32 log, uint32 seg, XLogRecPtr endptr);
static XLogRecord *ReadRecord(XLogRecPtr *RecPtr, int emode, char *buffer); static XLogRecord *ReadRecord(XLogRecPtr *RecPtr, int emode, char *buffer);
...@@ -1838,7 +1838,8 @@ XLogFileRead(uint32 log, uint32 seg, int emode) ...@@ -1838,7 +1838,8 @@ XLogFileRead(uint32 log, uint32 seg, int emode)
{ {
XLogFileName(xlogfname, tli, log, seg); XLogFileName(xlogfname, tli, log, seg);
restoredFromArchive = RestoreArchivedFile(path, xlogfname, restoredFromArchive = RestoreArchivedFile(path, xlogfname,
"RECOVERYXLOG"); "RECOVERYXLOG",
XLogSegSize);
} }
else else
XLogFilePath(path, tli, log, seg); XLogFilePath(path, tli, log, seg);
...@@ -1876,10 +1877,14 @@ XLogFileRead(uint32 log, uint32 seg, int emode) ...@@ -1876,10 +1877,14 @@ XLogFileRead(uint32 log, uint32 seg, int emode)
* If not successful, fill "path" with the name of the normal on-line file * If not successful, fill "path" with the name of the normal on-line file
* (which may or may not actually exist, but we'll try to use it), and return * (which may or may not actually exist, but we'll try to use it), and return
* FALSE. * FALSE.
*
* For fixed-size files, the caller may pass the expected size as an
* additional crosscheck on successful recovery. If the file size is not
* known, set expectedSize = 0.
*/ */
static bool static bool
RestoreArchivedFile(char *path, const char *xlogfname, RestoreArchivedFile(char *path, const char *xlogfname,
const char *recovername) const char *recovername, off_t expectedSize)
{ {
char xlogpath[MAXPGPATH]; char xlogpath[MAXPGPATH];
char xlogRestoreCmd[MAXPGPATH]; char xlogRestoreCmd[MAXPGPATH];
...@@ -1991,20 +1996,43 @@ RestoreArchivedFile(char *path, const char *xlogfname, ...@@ -1991,20 +1996,43 @@ RestoreArchivedFile(char *path, const char *xlogfname,
rc = system(xlogRestoreCmd); rc = system(xlogRestoreCmd);
if (rc == 0) if (rc == 0)
{ {
/* restore success ... assuming file is really there now ... */ /*
if (stat(xlogpath, &stat_buf) == 0) { * command apparently succeeded, but let's make sure the file is
* really there now and has the correct size.
*
* XXX I made wrong-size a fatal error to ensure the DBA would
* notice it, but is that too strong? We could try to plow ahead
* with a local copy of the file ... but the problem is that there
* probably isn't one, and we'd incorrectly conclude we've reached
* the end of WAL and we're done recovering ...
*/
if (stat(xlogpath, &stat_buf) == 0)
{
if (expectedSize > 0 && stat_buf.st_size != expectedSize)
ereport(FATAL,
(errmsg("archive file \"%s\" has wrong size: %lu instead of %lu",
xlogfname,
(unsigned long) stat_buf.st_size,
(unsigned long) expectedSize)));
else
{
ereport(LOG, ereport(LOG,
(errmsg("restored log file \"%s\" from archive", (errmsg("restored log file \"%s\" from archive",
xlogfname))); xlogfname)));
strcpy(path, xlogpath); strcpy(path, xlogpath);
return true; return true;
} }
}
else
{
/* stat failed */
if (errno != ENOENT) if (errno != ENOENT)
ereport(FATAL, ereport(FATAL,
(errcode_for_file_access(), (errcode_for_file_access(),
errmsg("could not stat \"%s\": %m", errmsg("could not stat \"%s\": %m",
xlogpath))); xlogpath)));
} }
}
/* /*
* remember, we rollforward UNTIL the restore fails * remember, we rollforward UNTIL the restore fails
...@@ -2664,7 +2692,7 @@ readTimeLineHistory(TimeLineID targetTLI) ...@@ -2664,7 +2692,7 @@ readTimeLineHistory(TimeLineID targetTLI)
if (InArchiveRecovery) if (InArchiveRecovery)
{ {
TLHistoryFileName(histfname, targetTLI); TLHistoryFileName(histfname, targetTLI);
RestoreArchivedFile(path, histfname, "RECOVERYHISTORY"); RestoreArchivedFile(path, histfname, "RECOVERYHISTORY", 0);
} }
else else
TLHistoryFilePath(path, targetTLI); TLHistoryFilePath(path, targetTLI);
...@@ -2749,7 +2777,7 @@ existsTimeLineHistory(TimeLineID probeTLI) ...@@ -2749,7 +2777,7 @@ existsTimeLineHistory(TimeLineID probeTLI)
if (InArchiveRecovery) if (InArchiveRecovery)
{ {
TLHistoryFileName(histfname, probeTLI); TLHistoryFileName(histfname, probeTLI);
RestoreArchivedFile(path, histfname, "RECOVERYHISTORY"); RestoreArchivedFile(path, histfname, "RECOVERYHISTORY", 0);
} }
else else
TLHistoryFilePath(path, probeTLI); TLHistoryFilePath(path, probeTLI);
...@@ -2853,7 +2881,7 @@ writeTimeLineHistory(TimeLineID newTLI, TimeLineID parentTLI, ...@@ -2853,7 +2881,7 @@ writeTimeLineHistory(TimeLineID newTLI, TimeLineID parentTLI,
if (InArchiveRecovery) if (InArchiveRecovery)
{ {
TLHistoryFileName(histfname, parentTLI); TLHistoryFileName(histfname, parentTLI);
RestoreArchivedFile(path, histfname, "RECOVERYHISTORY"); RestoreArchivedFile(path, histfname, "RECOVERYHISTORY", 0);
} }
else else
TLHistoryFilePath(path, parentTLI); TLHistoryFilePath(path, parentTLI);
...@@ -4042,6 +4070,11 @@ StartupXLOG(void) ...@@ -4042,6 +4070,11 @@ StartupXLOG(void)
if (checkPoint.undo.xrecoff == 0) if (checkPoint.undo.xrecoff == 0)
checkPoint.undo = RecPtr; checkPoint.undo = RecPtr;
/*
* Check whether we need to force recovery from WAL. If it appears
* to have been a clean shutdown and we did not have a recovery.conf
* file, then assume no recovery needed.
*/
if (XLByteLT(checkPoint.undo, RecPtr) || if (XLByteLT(checkPoint.undo, RecPtr) ||
XLByteLT(checkPoint.redo, RecPtr)) XLByteLT(checkPoint.redo, RecPtr))
{ {
...@@ -4054,13 +4087,23 @@ StartupXLOG(void) ...@@ -4054,13 +4087,23 @@ StartupXLOG(void)
InRecovery = true; InRecovery = true;
/* REDO */ /* REDO */
if (InRecovery) if (InRecovery || InArchiveRecovery)
{ {
int rmid; int rmid;
if (InRecovery)
{
ereport(LOG, ereport(LOG,
(errmsg("database system was not properly shut down; " (errmsg("database system was not properly shut down; "
"automatic recovery in progress"))); "automatic recovery in progress")));
}
else
{
/* force recovery due to presence of recovery.conf */
InRecovery = true;
ereport(LOG,
(errmsg("automatic recovery in progress")));
}
ControlFile->state = DB_IN_RECOVERY; ControlFile->state = DB_IN_RECOVERY;
ControlFile->time = time(NULL); ControlFile->time = time(NULL);
UpdateControlFile(); UpdateControlFile();
...@@ -4158,9 +4201,12 @@ StartupXLOG(void) ...@@ -4158,9 +4201,12 @@ StartupXLOG(void)
InRedo = false; InRedo = false;
} }
else else
{
/* there are no WAL records following the checkpoint */
ereport(LOG, ereport(LOG,
(errmsg("redo is not required"))); (errmsg("redo is not required")));
} }
}
/* /*
* Re-fetch the last valid or last applied record, so we can identify * Re-fetch the last valid or last applied record, so we can identify
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment