Commit de768844 authored by Heikki Linnakangas's avatar Heikki Linnakangas

At promotion, archive last segment from old timeline with .partial suffix.

Previously, we would archive the possible-incomplete WAL segment with its
normal filename, but that causes trouble if the server owning that timeline
is still running, and tries to archive the same segment later. It's not nice
for the standby to trip up the master's archival like that. And it's pretty
confusing, anyway, to have an incomplete segment in the archive that's
indistinguishable from a normal, complete segment.

To avoid such confusion, add a .partial suffix to the file. Or to be more
precise, make a copy of the old segment under the .partial suffix, and
archive that instead of the original file. pg_receivexlog also uses the
.partial suffix for the same purpose, to tell apart incompletely streamed
files from complete ones.

There is no automatic mechanism to use the .partial files at recovery, so
they will go unused, unless the administrator manually copies to them to
the pg_xlog directory (and removes the .partial suffix). Recovery won't
normally need the WAL - when recovering to the new timeline, it will find
the same WAL on the first segment on the new timeline instead - but it
nevertheless feels better to archive the file with the .partial suffix, for
debugging purposes if nothing else.
parent 179cdd09
...@@ -3020,24 +3020,22 @@ XLogFileInit(XLogSegNo logsegno, bool *use_existent, bool use_lock) ...@@ -3020,24 +3020,22 @@ XLogFileInit(XLogSegNo logsegno, bool *use_existent, bool use_lock)
} }
/* /*
* Create a new XLOG file segment by copying a pre-existing one. * Copy a WAL segment file in pg_xlog directory.
* *
* destsegno: identify segment to be created. * dstfname destination filename
* srcfname source filename
* upto how much of the source file to copy? (the rest is filled with
* zeros)
* *
* srcTLI, srclog, srcseg: identify segment to be copied (could be from * If dstfname is not given, the file is created with a temporary filename,
* a different timeline) * which is returned. Both filenames are relative to the pg_xlog directory.
* *
* upto: how much of the source file to copy? (the rest is filled with zeros) * NB: Any existing file with the same name will be overwritten!
*
* Currently this is only used during recovery, and so there are no locking
* considerations. But we should be just as tense as XLogFileInit to avoid
* emplacing a bogus file.
*/ */
static void static char *
XLogFileCopy(XLogSegNo destsegno, TimeLineID srcTLI, XLogSegNo srcsegno, XLogFileCopy(char *dstfname, char *srcfname, int upto)
int upto)
{ {
char path[MAXPGPATH]; char srcpath[MAXPGPATH];
char tmppath[MAXPGPATH]; char tmppath[MAXPGPATH];
char buffer[XLOG_BLCKSZ]; char buffer[XLOG_BLCKSZ];
int srcfd; int srcfd;
...@@ -3047,12 +3045,12 @@ XLogFileCopy(XLogSegNo destsegno, TimeLineID srcTLI, XLogSegNo srcsegno, ...@@ -3047,12 +3045,12 @@ XLogFileCopy(XLogSegNo destsegno, TimeLineID srcTLI, XLogSegNo srcsegno,
/* /*
* Open the source file * Open the source file
*/ */
XLogFilePath(path, srcTLI, srcsegno); snprintf(srcpath, MAXPGPATH, XLOGDIR "/%s", srcfname);
srcfd = OpenTransientFile(path, O_RDONLY | PG_BINARY, 0); srcfd = OpenTransientFile(srcpath, O_RDONLY | PG_BINARY, 0);
if (srcfd < 0) if (srcfd < 0)
ereport(ERROR, ereport(ERROR,
(errcode_for_file_access(), (errcode_for_file_access(),
errmsg("could not open file \"%s\": %m", path))); errmsg("could not open file \"%s\": %m", srcpath)));
/* /*
* Copy into a temp file name. * Copy into a temp file name.
...@@ -3094,10 +3092,12 @@ XLogFileCopy(XLogSegNo destsegno, TimeLineID srcTLI, XLogSegNo srcsegno, ...@@ -3094,10 +3092,12 @@ XLogFileCopy(XLogSegNo destsegno, TimeLineID srcTLI, XLogSegNo srcsegno,
if (errno != 0) if (errno != 0)
ereport(ERROR, ereport(ERROR,
(errcode_for_file_access(), (errcode_for_file_access(),
errmsg("could not read file \"%s\": %m", path))); errmsg("could not read file \"%s\": %m",
srcpath)));
else else
ereport(ERROR, ereport(ERROR,
(errmsg("not enough data in file \"%s\"", path))); (errmsg("not enough data in file \"%s\"",
srcpath)));
} }
} }
errno = 0; errno = 0;
...@@ -3131,10 +3131,24 @@ XLogFileCopy(XLogSegNo destsegno, TimeLineID srcTLI, XLogSegNo srcsegno, ...@@ -3131,10 +3131,24 @@ XLogFileCopy(XLogSegNo destsegno, TimeLineID srcTLI, XLogSegNo srcsegno,
CloseTransientFile(srcfd); CloseTransientFile(srcfd);
/* /*
* Now move the segment into place with its final name. * Now move the segment into place with its final name. (Or just return
* the path to the file we created, if the caller wants to handle the
* rest on its own.)
*/ */
if (!InstallXLogFileSegment(&destsegno, tmppath, false, 0, false)) if (dstfname)
elog(ERROR, "InstallXLogFileSegment should not have failed"); {
char dstpath[MAXPGPATH];
snprintf(dstpath, MAXPGPATH, XLOGDIR "/%s", dstfname);
if (rename(tmppath, dstpath) < 0)
ereport(ERROR,
(errcode_for_file_access(),
errmsg("could not rename file \"%s\" to \"%s\": %m",
tmppath, dstpath)));
return NULL;
}
else
return pstrdup(tmppath);
} }
/* /*
...@@ -3577,7 +3591,8 @@ RemoveOldXlogFiles(XLogSegNo segno, XLogRecPtr PriorRedoPtr, XLogRecPtr endptr) ...@@ -3577,7 +3591,8 @@ RemoveOldXlogFiles(XLogSegNo segno, XLogRecPtr PriorRedoPtr, XLogRecPtr endptr)
while ((xlde = ReadDir(xldir, XLOGDIR)) != NULL) while ((xlde = ReadDir(xldir, XLOGDIR)) != NULL)
{ {
/* Ignore files that are not XLOG segments */ /* Ignore files that are not XLOG segments */
if (!IsXLogFileName(xlde->d_name)) if (!IsXLogFileName(xlde->d_name) &&
!IsPartialXLogFileName(xlde->d_name))
continue; continue;
/* /*
...@@ -5189,25 +5204,79 @@ exitArchiveRecovery(TimeLineID endTLI, XLogRecPtr endOfLog) ...@@ -5189,25 +5204,79 @@ exitArchiveRecovery(TimeLineID endTLI, XLogRecPtr endOfLog)
* of the old timeline up to the switch point, to the starting WAL segment * of the old timeline up to the switch point, to the starting WAL segment
* on the new timeline. * on the new timeline.
* *
* Notify the archiver that the last WAL segment of the old timeline is * What to do with the partial segment on the old timeline? If we don't
* ready to copy to archival storage if its .done file doesn't exist * archive it, and the server that created the WAL never archives it
* (e.g., if it's the restored WAL file, it's expected to have .done file). * either (e.g. because it was hit by a meteor), it will never make it to
* Otherwise, it is not archived for a while. * the archive. That's OK from our point of view, because the new segment
* that we created with the new TLI contains all the WAL from the old
* timeline up to the switch point. But if you later try to do PITR to the
* "missing" WAL on the old timeline, recovery won't find it in the
* archive. It's physically present in the new file with new TLI, but
* recovery won't look there when it's recovering to the older timeline.
* On the other hand, if we archive the partial segment, and the original
* server on that timeline is still running and archives the completed
* version of the same segment later, it will fail. (We used to do that in
* 9.4 and below, and it caused such problems).
*
* As a compromise, we archive the last segment with the .partial suffix.
* Archive recovery will never try to read .partial segments, so they will
* normally go unused. But in the odd PITR case, the administrator can
* copy them manually to the pg_xlog directory (removing the suffix). They
* can be useful in debugging, too.
*
* If a .done file already exists for the old timeline, however, there is
* already a complete copy of the file in the archive, and there is no
* need to archive the partial one. (In particular, if it was restored
* from the archive to begin with, it's expected to have .done file).
*/ */
if (endLogSegNo == startLogSegNo) if (endLogSegNo == startLogSegNo)
{ {
XLogFileCopy(startLogSegNo, endTLI, endLogSegNo, char *tmpfname;
endOfLog % XLOG_SEG_SIZE);
XLogFileName(xlogfname, endTLI, endLogSegNo);
/*
* Make a copy of the file on the new timeline.
*
* Writing WAL isn't allowed yet, so there are no locking
* considerations. But we should be just as tense as XLogFileInit to
* avoid emplacing a bogus file.
*/
tmpfname = XLogFileCopy(NULL, xlogfname, endOfLog % XLOG_SEG_SIZE);
if (!InstallXLogFileSegment(&endLogSegNo, tmpfname, false, 0, false))
elog(ERROR, "InstallXLogFileSegment should not have failed");
/* Create .ready file only when neither .ready nor .done files exist */ /*
if (XLogArchivingActive()) * Make a .partial copy for the archive (unless the original file was
* already archived)
*/
if (XLogArchivingActive() && XLogArchiveIsBusy(xlogfname))
{ {
XLogFileName(xlogfname, endTLI, endLogSegNo); char partialfname[MAXFNAMELEN];
XLogArchiveCheckDone(xlogfname);
snprintf(partialfname, MAXFNAMELEN, "%s.partial", xlogfname);
/* Make sure there's no .done or .ready file for it. */
XLogArchiveCleanup(partialfname);
/*
* We copy the whole segment, not just upto the switch point.
* The portion after the switch point might be garbage, but it
* might also be valid WAL, if we stopped recovery at user's
* request before reaching the end. Better to preserve the
* file as it is, garbage and all, than lose the evidence if
* something goes wrong.
*/
(void) XLogFileCopy(partialfname, xlogfname, XLOG_SEG_SIZE);
XLogArchiveNotify(partialfname);
} }
} }
else else
{ {
/*
* The switch happened at a segment boundary, so just create the next
* segment on the new timeline.
*/
bool use_existent = true; bool use_existent = true;
int fd; int fd;
......
...@@ -145,6 +145,11 @@ typedef XLogLongPageHeaderData *XLogLongPageHeader; ...@@ -145,6 +145,11 @@ typedef XLogLongPageHeaderData *XLogLongPageHeader;
#define IsXLogFileName(fname) \ #define IsXLogFileName(fname) \
(strlen(fname) == 24 && strspn(fname, "0123456789ABCDEF") == 24) (strlen(fname) == 24 && strspn(fname, "0123456789ABCDEF") == 24)
/*
* XLOG segment with .partial suffix. Used by pg_receivexlog and at end of
* archive recovery, when we want to archive a WAL segment but it might not
* be complete yet.
*/
#define IsPartialXLogFileName(fname) \ #define IsPartialXLogFileName(fname) \
(strlen(fname) == 24 + strlen(".partial") && \ (strlen(fname) == 24 + strlen(".partial") && \
strspn(fname, "0123456789ABCDEF") == 24 && \ strspn(fname, "0123456789ABCDEF") == 24 && \
......
...@@ -24,7 +24,7 @@ ...@@ -24,7 +24,7 @@
*/ */
#define MIN_XFN_CHARS 16 #define MIN_XFN_CHARS 16
#define MAX_XFN_CHARS 40 #define MAX_XFN_CHARS 40
#define VALID_XFN_CHARS "0123456789ABCDEF.history.backup" #define VALID_XFN_CHARS "0123456789ABCDEF.history.backup.partial"
/* ---------- /* ----------
* Functions called from postmaster * Functions called from postmaster
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment