Commit 07e8b6aa authored by Tom Lane's avatar Tom Lane

Don't allow walsender to send WAL data until it's been safely fsync'd on the

master.  Otherwise a subsequent crash could cause the master to lose WAL that
has already been applied on the slave, resulting in the slave being out of
sync and soon corrupt.  Per recent discussion and an example from Robert Haas.

Fujii Masao
parent 8f4e1218
...@@ -7,7 +7,7 @@ ...@@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2010, PostgreSQL Global Development Group * Portions Copyright (c) 1996-2010, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
* *
* $PostgreSQL: pgsql/src/backend/access/transam/xlog.c,v 1.424 2010/06/14 06:04:21 heikki Exp $ * $PostgreSQL: pgsql/src/backend/access/transam/xlog.c,v 1.425 2010/06/17 16:41:25 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -6803,17 +6803,18 @@ GetInsertRecPtr(void) ...@@ -6803,17 +6803,18 @@ GetInsertRecPtr(void)
} }
/* /*
* GetWriteRecPtr -- Returns the current write position. * GetFlushRecPtr -- Returns the current flush position, ie, the last WAL
* position known to be fsync'd to disk.
*/ */
XLogRecPtr XLogRecPtr
GetWriteRecPtr(void) GetFlushRecPtr(void)
{ {
/* use volatile pointer to prevent code rearrangement */ /* use volatile pointer to prevent code rearrangement */
volatile XLogCtlData *xlogctl = XLogCtl; volatile XLogCtlData *xlogctl = XLogCtl;
XLogRecPtr recptr; XLogRecPtr recptr;
SpinLockAcquire(&xlogctl->info_lck); SpinLockAcquire(&xlogctl->info_lck);
recptr = xlogctl->LogwrtResult.Write; recptr = xlogctl->LogwrtResult.Flush;
SpinLockRelease(&xlogctl->info_lck); SpinLockRelease(&xlogctl->info_lck);
return recptr; return recptr;
......
...@@ -3,8 +3,9 @@ ...@@ -3,8 +3,9 @@
* walsender.c * walsender.c
* *
* The WAL sender process (walsender) is new as of Postgres 9.0. It takes * The WAL sender process (walsender) is new as of Postgres 9.0. It takes
* charge of XLOG streaming sender in the primary server. At first, it is * care of sending XLOG from the primary server to a single recipient.
* started by the postmaster when the walreceiver in the standby server * (Note that there can be more than one walsender process concurrently.)
* It is started by the postmaster when the walreceiver of a standby server
* connects to the primary server and requests XLOG streaming replication. * connects to the primary server and requests XLOG streaming replication.
* It attempts to keep reading XLOG records from the disk and sending them * It attempts to keep reading XLOG records from the disk and sending them
* to the standby server, as long as the connection is alive (i.e., like * to the standby server, as long as the connection is alive (i.e., like
...@@ -23,13 +24,11 @@ ...@@ -23,13 +24,11 @@
* This instruct walsender to send any outstanding WAL, including the * This instruct walsender to send any outstanding WAL, including the
* shutdown checkpoint record, and then exit. * shutdown checkpoint record, and then exit.
* *
* Note that there can be more than one walsender process concurrently.
* *
* Portions Copyright (c) 2010-2010, PostgreSQL Global Development Group * Portions Copyright (c) 2010-2010, PostgreSQL Global Development Group
* *
*
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/replication/walsender.c,v 1.26 2010/06/03 23:00:14 tgl Exp $ * $PostgreSQL: pgsql/src/backend/replication/walsender.c,v 1.27 2010/06/17 16:41:25 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -641,7 +640,7 @@ XLogRead(char *buf, XLogRecPtr recptr, Size nbytes) ...@@ -641,7 +640,7 @@ XLogRead(char *buf, XLogRecPtr recptr, Size nbytes)
} }
/* /*
* Read up to MAX_SEND_SIZE bytes of WAL that's been written to disk, * Read up to MAX_SEND_SIZE bytes of WAL that's been flushed to disk,
* but not yet sent to the client, and send it. * but not yet sent to the client, and send it.
* *
* msgbuf is a work area in which the output message is constructed. It's * msgbuf is a work area in which the output message is constructed. It's
...@@ -663,11 +662,14 @@ XLogSend(char *msgbuf, bool *caughtup) ...@@ -663,11 +662,14 @@ XLogSend(char *msgbuf, bool *caughtup)
WalDataMessageHeader msghdr; WalDataMessageHeader msghdr;
/* /*
* Attempt to send all data that's already been written out from WAL * Attempt to send all data that's already been written out and fsync'd
* buffers (note it might not yet be fsync'd to disk). We cannot go * to disk. We cannot go further than what's been written out given the
* further than that given the current implementation of XLogRead(). * current implementation of XLogRead(). And in any case it's unsafe to
* send WAL that is not securely down to disk on the master: if the master
* subsequently crashes and restarts, slaves must not have applied any WAL
* that gets lost on the master.
*/ */
SendRqstPtr = GetWriteRecPtr(); SendRqstPtr = GetFlushRecPtr();
/* Quick exit if nothing to do */ /* Quick exit if nothing to do */
if (XLByteLE(SendRqstPtr, sentPtr)) if (XLByteLE(SendRqstPtr, sentPtr))
...@@ -679,7 +681,7 @@ XLogSend(char *msgbuf, bool *caughtup) ...@@ -679,7 +681,7 @@ XLogSend(char *msgbuf, bool *caughtup)
/* /*
* Figure out how much to send in one message. If there's no more than * Figure out how much to send in one message. If there's no more than
* MAX_SEND_SIZE bytes to send, send everything. Otherwise send * MAX_SEND_SIZE bytes to send, send everything. Otherwise send
* MAX_SEND_SIZE bytes, but round to logfile or page boundary. * MAX_SEND_SIZE bytes, but round back to logfile or page boundary.
* *
* The rounding is not only for performance reasons. Walreceiver * The rounding is not only for performance reasons. Walreceiver
* relies on the fact that we never split a WAL record across two * relies on the fact that we never split a WAL record across two
......
...@@ -6,7 +6,7 @@ ...@@ -6,7 +6,7 @@
* Portions Copyright (c) 1996-2010, PostgreSQL Global Development Group * Portions Copyright (c) 1996-2010, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
* *
* $PostgreSQL: pgsql/src/include/access/xlog.h,v 1.112 2010/06/10 07:49:23 heikki Exp $ * $PostgreSQL: pgsql/src/include/access/xlog.h,v 1.113 2010/06/17 16:41:25 tgl Exp $
*/ */
#ifndef XLOG_H #ifndef XLOG_H
#define XLOG_H #define XLOG_H
...@@ -294,7 +294,7 @@ extern bool CreateRestartPoint(int flags); ...@@ -294,7 +294,7 @@ extern bool CreateRestartPoint(int flags);
extern void XLogPutNextOid(Oid nextOid); extern void XLogPutNextOid(Oid nextOid);
extern XLogRecPtr GetRedoRecPtr(void); extern XLogRecPtr GetRedoRecPtr(void);
extern XLogRecPtr GetInsertRecPtr(void); extern XLogRecPtr GetInsertRecPtr(void);
extern XLogRecPtr GetWriteRecPtr(void); extern XLogRecPtr GetFlushRecPtr(void);
extern void GetNextXidAndEpoch(TransactionId *xid, uint32 *epoch); extern void GetNextXidAndEpoch(TransactionId *xid, uint32 *epoch);
extern TimeLineID GetRecoveryTargetTLI(void); extern TimeLineID GetRecoveryTargetTLI(void);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment