Commit 6b619551 authored by Alvaro Herrera's avatar Alvaro Herrera

Code review for transaction commit timestamps

There are three main changes here:

1. No longer cause a start failure in a standby if the feature is
disabled in postgresql.conf but enabled in the master.  This reverts one
part of commit 4f3924d9; what we keep is the ability of the standby
to activate/deactivate the module (which includes creating and removing
segments as appropriate) during replay of such actions in the master.

2. Replay WAL records affecting commitTS even if the feature is
disabled.  This means the standby will always have the same state as the
master after replay.

3. Have COMMIT PREPARE record the transaction commit time as well.  We
were previously only applying it in the normal transaction commit path.

Author: Petr Jelínek
Discussion: http://www.postgresql.org/message-id/CAHGQGwHereDzzzmfxEBYcVQu3oZv6vZcgu1TPeERWbDc+gQ06g@mail.gmail.com
Discussion: http://www.postgresql.org/message-id/CAHGQGwFuzfO4JscM9LCAmCDCxp_MfLvN4QdB+xWsS-FijbjTYQ@mail.gmail.com

Additionally, I cleaned up nearby code related to replication origins,
which I found a bit hard to follow, and fixed a couple of typos.

Backpatch to 9.5, where this code was introduced.

Per bug reports from Fujii Masao and subsequent discussion.
parent b631a46e
......@@ -111,7 +111,7 @@ xlog_desc(StringInfo buf, XLogReaderState *record)
appendStringInfo(buf, "max_connections=%d max_worker_processes=%d "
"max_prepared_xacts=%d max_locks_per_xact=%d "
"wal_level=%s wal_log_hints=%s "
"track_commit_timestamps=%s",
"track_commit_timestamp=%s",
xlrec.MaxConnections,
xlrec.max_worker_processes,
xlrec.max_prepared_xacts,
......
......@@ -122,29 +122,39 @@ static void WriteSetTimestampXlogRec(TransactionId mainxid, int nsubxids,
* subtrans implementation changes in the future, we might want to revisit the
* decision of storing timestamp info for each subxid.
*
* The do_xlog parameter tells us whether to include an XLog record of this
* or not. Normal path through RecordTransactionCommit() will be related
* to a transaction commit XLog record, and so should pass "false" here.
* Other callers probably want to pass true, so that the given values persist
* in case of crashes.
* The replaying_xlog parameter indicates whether the module should execute
* its write even if the feature is nominally disabled, because we're replaying
* a record generated from a master where the feature is enabled.
*
* The write_xlog parameter tells us whether to include an XLog record of this
* or not. Normally, this is called from transaction commit routines (both
* normal and prepared) and the information will be stored in the transaction
* commit XLog record, and so they should pass "false" for this. The XLog redo
* code should use "false" here as well. Other callers probably want to pass
* true, so that the given values persist in case of crashes.
*/
void
TransactionTreeSetCommitTsData(TransactionId xid, int nsubxids,
TransactionId *subxids, TimestampTz timestamp,
RepOriginId nodeid, bool do_xlog)
RepOriginId nodeid,
bool replaying_xlog, bool write_xlog)
{
int i;
TransactionId headxid;
TransactionId newestXact;
if (!track_commit_timestamp)
/* We'd better not try to write xlog during replay */
Assert(!(write_xlog && replaying_xlog));
/* No-op if feature not enabled, unless replaying WAL */
if (!track_commit_timestamp && !replaying_xlog)
return;
/*
* Comply with the WAL-before-data rule: if caller specified it wants this
* value to be recorded in WAL, do so before touching the data.
*/
if (do_xlog)
if (write_xlog)
WriteSetTimestampXlogRec(xid, nsubxids, subxids, timestamp, nodeid);
/*
......@@ -906,7 +916,8 @@ commit_ts_redo(XLogReaderState *record)
subxids = NULL;
TransactionTreeSetCommitTsData(setts->mainxid, nsubxids, subxids,
setts->timestamp, setts->nodeid, false);
setts->timestamp, setts->nodeid, false,
true);
if (subxids)
pfree(subxids);
}
......
......@@ -41,6 +41,7 @@
#include <time.h>
#include <unistd.h>
#include "access/commit_ts.h"
#include "access/htup_details.h"
#include "access/subtrans.h"
#include "access/transam.h"
......@@ -56,8 +57,9 @@
#include "miscadmin.h"
#include "pg_trace.h"
#include "pgstat.h"
#include "replication/walsender.h"
#include "replication/origin.h"
#include "replication/syncrep.h"
#include "replication/walsender.h"
#include "storage/fd.h"
#include "storage/ipc.h"
#include "storage/predicate.h"
......@@ -2070,8 +2072,9 @@ RecoverPreparedTransactions(void)
/*
* RecordTransactionCommitPrepared
*
* This is basically the same as RecordTransactionCommit: in particular,
* we must set the delayChkpt flag to avoid a race condition.
* This is basically the same as RecordTransactionCommit (q.v. if you change
* this function): in particular, we must set the delayChkpt flag to avoid a
* race condition.
*
* We know the transaction made at least one XLOG entry (its PREPARE),
* so it is never possible to optimize out the commit record.
......@@ -2087,6 +2090,15 @@ RecordTransactionCommitPrepared(TransactionId xid,
bool initfileinval)
{
XLogRecPtr recptr;
TimestampTz committs = GetCurrentTimestamp();
bool replorigin;
/*
* Are we using the replication origins feature? Or, in other words, are
* we replaying remote actions?
*/
replorigin = (replorigin_session_origin != InvalidRepOriginId &&
replorigin_session_origin != DoNotReplicateId);
START_CRIT_SECTION();
......@@ -2094,12 +2106,33 @@ RecordTransactionCommitPrepared(TransactionId xid,
MyPgXact->delayChkpt = true;
/* Emit the XLOG commit record */
recptr = XactLogCommitRecord(GetCurrentTimestamp(),
recptr = XactLogCommitRecord(committs,
nchildren, children, nrels, rels,
ninvalmsgs, invalmsgs,
initfileinval, false,
xid);
if (replorigin)
/* Move LSNs forward for this replication origin */
replorigin_session_advance(replorigin_session_origin_lsn,
XactLastRecEnd);
/*
* Record commit timestamp. The value comes from plain commit timestamp
* if replorigin is not enabled, or replorigin already set a value for us
* in replorigin_session_origin_timestamp otherwise.
*
* We don't need to WAL-log anything here, as the commit record written
* above already contains the data.
*/
if (!replorigin || replorigin_session_origin_timestamp == 0)
replorigin_session_origin_timestamp = committs;
TransactionTreeSetCommitTsData(xid, nchildren, children,
replorigin_session_origin_timestamp,
replorigin_session_origin, false, false);
/*
* We don't currently try to sleep before flush here ... nor is there any
* support for async commit of a prepared xact (the very idea is probably
......
......@@ -42,9 +42,9 @@
#include "miscadmin.h"
#include "pgstat.h"
#include "replication/logical.h"
#include "replication/walsender.h"
#include "replication/syncrep.h"
#include "replication/origin.h"
#include "replication/syncrep.h"
#include "replication/walsender.h"
#include "storage/fd.h"
#include "storage/lmgr.h"
#include "storage/predicate.h"
......@@ -1119,6 +1119,8 @@ AtSubStart_ResourceOwner(void)
*
* Returns latest XID among xact and its children, or InvalidTransactionId
* if the xact has no XID. (We compute that here just because it's easier.)
*
* If you change this function, see RecordTransactionCommitPrepared also.
*/
static TransactionId
RecordTransactionCommit(void)
......@@ -1172,6 +1174,15 @@ RecordTransactionCommit(void)
}
else
{
bool replorigin;
/*
* Are we using the replication origins feature? Or, in other words,
* are we replaying remote actions?
*/
replorigin = (replorigin_session_origin != InvalidRepOriginId &&
replorigin_session_origin != DoNotReplicateId);
/*
* Begin commit critical section and insert the commit XLOG record.
*/
......@@ -1206,26 +1217,28 @@ RecordTransactionCommit(void)
RelcacheInitFileInval, forceSyncCommit,
InvalidTransactionId /* plain commit */ );
/*
* Record plain commit ts if not replaying remote actions, or if no
* timestamp is configured.
*/
if (replorigin_session_origin == InvalidRepOriginId ||
replorigin_session_origin == DoNotReplicateId ||
replorigin_session_origin_timestamp == 0)
replorigin_session_origin_timestamp = xactStopTimestamp;
else
if (replorigin)
/* Move LSNs forward for this replication origin */
replorigin_session_advance(replorigin_session_origin_lsn,
XactLastRecEnd);
/*
* We don't need to WAL log origin or timestamp here, the commit
* record contains all the necessary information and will redo the SET
* action during replay.
* Record commit timestamp. The value comes from plain commit
* timestamp if there's no replication origin; otherwise, the
* timestamp was already set in replorigin_session_origin_timestamp by
* replication.
*
* We don't need to WAL-log anything here, as the commit record
* written above already contains the data.
*/
if (!replorigin || replorigin_session_origin_timestamp == 0)
replorigin_session_origin_timestamp = xactStopTimestamp;
TransactionTreeSetCommitTsData(xid, nchildren, children,
replorigin_session_origin_timestamp,
replorigin_session_origin, false);
replorigin_session_origin,
false, false);
}
/*
......@@ -5321,7 +5334,7 @@ xact_redo_commit(xl_xact_parsed_commit *parsed,
/* Set the transaction commit timestamp and metadata */
TransactionTreeSetCommitTsData(xid, parsed->nsubxacts, parsed->subxacts,
commit_time, origin_id,
false);
true, false);
if (standbyState == STANDBY_DISABLED)
{
......
......@@ -5826,19 +5826,6 @@ do { \
minValue))); \
} while(0)
#define RecoveryRequiresBoolParameter(param_name, currValue, masterValue) \
do { \
bool _currValue = (currValue); \
bool _masterValue = (masterValue); \
if (_currValue != _masterValue) \
ereport(ERROR, \
(errcode(ERRCODE_INVALID_PARAMETER_VALUE), \
errmsg("hot standby is not possible because it requires \"%s\" to be same on master and standby (master has \"%s\", standby has \"%s\")", \
param_name, \
_masterValue ? "true" : "false", \
_currValue ? "true" : "false"))); \
} while(0)
/*
* Check to see if required parameters are set high enough on this server
* for various aspects of recovery operation.
......@@ -5885,9 +5872,6 @@ CheckRequiredParameterValues(void)
RecoveryRequiresIntParameter("max_locks_per_transaction",
max_locks_per_xact,
ControlFile->max_locks_per_xact);
RecoveryRequiresBoolParameter("track_commit_timestamp",
track_commit_timestamp,
ControlFile->track_commit_timestamp);
}
}
......
......@@ -24,7 +24,8 @@ extern bool check_track_commit_timestamp(bool *newval, void **extra,
extern void TransactionTreeSetCommitTsData(TransactionId xid, int nsubxids,
TransactionId *subxids, TimestampTz timestamp,
RepOriginId nodeid, bool do_xlog);
RepOriginId nodeid,
bool replaying_xlog, bool write_xlog);
extern bool TransactionIdGetCommitTsData(TransactionId xid,
TimestampTz *ts, RepOriginId *nodeid);
extern TransactionId GetLatestCommitTsData(TimestampTz *ts,
......@@ -67,4 +68,4 @@ extern void commit_ts_redo(XLogReaderState *record);
extern void commit_ts_desc(StringInfo buf, XLogReaderState *record);
extern const char *commit_ts_identify(uint8 info);
#endif /* COMMITTS_H */
#endif /* COMMIT_TS_H */
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment