Commit 73c986ad authored by Alvaro Herrera's avatar Alvaro Herrera

Keep track of transaction commit timestamps

Transactions can now set their commit timestamp directly as they commit,
or an external transaction commit timestamp can be fed from an outside
system using the new function TransactionTreeSetCommitTsData().  This
data is crash-safe, and truncated at Xid freeze point, same as pg_clog.

This module is disabled by default because it causes a performance hit,
but can be enabled in postgresql.conf requiring only a server restart.

A new test in src/test/modules is included.

Catalog version bumped due to the new subdirectory within PGDATA and a
couple of new SQL functions.

Authors: Álvaro Herrera and Petr Jelínek

Reviewed to varying degrees by Michael Paquier, Andres Freund, Robert
Haas, Amit Kapila, Fujii Masao, Jaime Casanova, Simon Riggs, Steven
Singer, Peter Eisentraut
parent 6597ec9b
......@@ -430,6 +430,13 @@ copy_clog_xlog_xid(void)
"\"%s/pg_resetxlog\" -f -e %u \"%s\"",
new_cluster.bindir, old_cluster.controldata.chkpnt_nxtepoch,
new_cluster.pgdata);
/* must reset commit timestamp limits also */
exec_prog(UTILITY_LOG_FILE, NULL, true,
"\"%s/pg_resetxlog\" -f -c %u,%u \"%s\"",
new_cluster.bindir,
old_cluster.controldata.chkpnt_nxtxid,
old_cluster.controldata.chkpnt_nxtxid,
new_cluster.pgdata);
check_ok();
/*
......
......@@ -10,6 +10,7 @@
#include "access/brin_xlog.h"
#include "access/clog.h"
#include "access/commit_ts.h"
#include "access/gin.h"
#include "access/gist_private.h"
#include "access/hash.h"
......
......@@ -2673,6 +2673,20 @@ include_dir 'conf.d'
</listitem>
</varlistentry>
<varlistentry id="guc-track-commit-timestamp" xreflabel="track_commit_timestamp">
<term><varname>track_commit_timestamp</varname> (<type>bool</type>)</term>
<indexterm>
<primary><varname>track_commit_timestamp</> configuration parameter</primary>
</indexterm>
<listitem>
<para>
Record commit time of transactions. This parameter
can only be set in <filename>postgresql.conf</> file or on the server
command line. The default value is <literal>off</literal>.
</para>
</listitem>
</varlistentry>
</variablelist>
</sect2>
......
......@@ -15938,6 +15938,45 @@ SELECT collation for ('foo' COLLATE "de_DE");
For example <literal>10:20:10,14,15</literal> means
<literal>xmin=10, xmax=20, xip_list=10, 14, 15</literal>.
</para>
<para>
The functions shown in <xref linkend="functions-commit-timestamp">
provide information about transactions that have been already committed.
These functions mainly provide information about when the transactions
were committed. They only provide useful data when
<xref linkend="guc-track-commit-timestamp"> configuration option is enabled
and only for transactions that were committed after it was enabled.
</para>
<table id="functions-commit-timestamp">
<title>Committed transaction information</title>
<tgroup cols="3">
<thead>
<row><entry>Name</entry> <entry>Return Type</entry> <entry>Description</entry></row>
</thead>
<tbody>
<row>
<entry>
<indexterm><primary>pg_xact_commit_timestamp</primary></indexterm>
<literal><function>pg_xact_commit_timestamp(<parameter>xid</parameter>)</function></literal>
</entry>
<entry><type>timestamp with time zone</type></entry>
<entry>get commit timestamp of a transaction</entry>
</row>
<row>
<entry>
<indexterm><primary>pg_last_committed_xact</primary></indexterm>
<literal><function>pg_last_committed_xact()</function></literal>
</entry>
<entry><parameter>xid</> <type>xid</>, <parameter>timestamp</> <type>timestamp with time zone</></entry>
<entry>get transaction ID and commit timestamp of latest committed transaction</entry>
</row>
</tbody>
</tgroup>
</table>
</sect1>
<sect1 id="functions-admin">
......
......@@ -22,6 +22,7 @@ PostgreSQL documentation
<refsynopsisdiv>
<cmdsynopsis>
<command>pg_resetxlog</command>
<arg choice="opt"><option>-c</option> <replaceable class="parameter">xid</replaceable>,<replaceable class="parameter">xid</replaceable></arg>
<arg choice="opt"><option>-f</option></arg>
<arg choice="opt"><option>-n</option></arg>
<arg choice="opt"><option>-o</option> <replaceable class="parameter">oid</replaceable></arg>
......@@ -79,9 +80,12 @@ PostgreSQL documentation
<para>
The <option>-o</>, <option>-x</>, <option>-e</>,
<option>-m</>, <option>-O</>,
<option>-c</>
and <option>-l</>
options allow the next OID, next transaction ID, next transaction ID's
epoch, next and oldest multitransaction ID, next multitransaction offset, and WAL
epoch, next and oldest multitransaction ID, next multitransaction offset,
oldest and newest transaction IDs for which the commit time can be retrieved,
and WAL
starting address values to be set manually. These are only needed when
<command>pg_resetxlog</command> is unable to determine appropriate values
by reading <filename>pg_control</>. Safe values can be determined as
......@@ -128,6 +132,19 @@ PostgreSQL documentation
</para>
</listitem>
<listitem>
<para>
A safe value for the oldest transaction ID for which the commit time can
be retrieved (first part of <option>-c</>) can be determined by looking
for the numerically smallest file name in the directory
<filename>pg_committs</> under the data directory. Conversely, a safe
value for the newest transaction ID for which the commit time can be
retrieved (second part of <option>-c</>) can be determined by looking for
the numerically greatest file name in the same directory. As above, the
file names are in hexadecimal.
</para>
</listitem>
<listitem>
<para>
The WAL starting address (<option>-l</>) should be
......
......@@ -66,6 +66,11 @@ Item
<structname>pg_database</></entry>
</row>
<row>
<entry><filename>pg_commit_ts</></entry>
<entry>Subdirectory containing transaction commit timestamp data</entry>
</row>
<row>
<entry><filename>pg_clog</></entry>
<entry>Subdirectory containing transaction commit status data</entry>
......
......@@ -8,7 +8,7 @@ subdir = src/backend/access/rmgrdesc
top_builddir = ../../../..
include $(top_builddir)/src/Makefile.global
OBJS = brindesc.o clogdesc.o dbasedesc.o gindesc.o gistdesc.o \
OBJS = brindesc.o clogdesc.o committsdesc.o dbasedesc.o gindesc.o gistdesc.o \
hashdesc.o heapdesc.o \
mxactdesc.o nbtdesc.o relmapdesc.o seqdesc.o smgrdesc.o spgdesc.o \
standbydesc.o tblspcdesc.o xactdesc.o xlogdesc.o
......
/*-------------------------------------------------------------------------
*
* committsdesc.c
* rmgr descriptor routines for access/transam/commit_ts.c
*
* Portions Copyright (c) 1996-2014, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
*
* IDENTIFICATION
* src/backend/access/rmgrdesc/committsdesc.c
*
*-------------------------------------------------------------------------
*/
#include "postgres.h"
#include "access/commit_ts.h"
#include "utils/timestamp.h"
void
commit_ts_desc(StringInfo buf, XLogReaderState *record)
{
char *rec = XLogRecGetData(record);
uint8 info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
if (info == COMMIT_TS_ZEROPAGE)
{
int pageno;
memcpy(&pageno, rec, sizeof(int));
appendStringInfo(buf, "%d", pageno);
}
else if (info == COMMIT_TS_TRUNCATE)
{
int pageno;
memcpy(&pageno, rec, sizeof(int));
appendStringInfo(buf, "%d", pageno);
}
else if (info == COMMIT_TS_SETTS)
{
xl_commit_ts_set *xlrec = (xl_commit_ts_set *) rec;
int nsubxids;
appendStringInfo(buf, "set %s/%d for: %u",
timestamptz_to_str(xlrec->timestamp),
xlrec->nodeid,
xlrec->mainxid);
nsubxids = ((XLogRecGetDataLen(record) - SizeOfCommitTsSet) /
sizeof(TransactionId));
if (nsubxids > 0)
{
int i;
TransactionId *subxids;
subxids = palloc(sizeof(TransactionId) * nsubxids);
memcpy(subxids,
XLogRecGetData(record) + SizeOfCommitTsSet,
sizeof(TransactionId) * nsubxids);
for (i = 0; i < nsubxids; i++)
appendStringInfo(buf, ", %u", subxids[i]);
pfree(subxids);
}
}
}
const char *
commit_ts_identify(uint8 info)
{
switch (info)
{
case COMMIT_TS_ZEROPAGE:
return "ZEROPAGE";
case COMMIT_TS_TRUNCATE:
return "TRUNCATE";
case COMMIT_TS_SETTS:
return "SETTS";
default:
return NULL;
}
}
......@@ -45,6 +45,7 @@ xlog_desc(StringInfo buf, XLogReaderState *record)
appendStringInfo(buf, "redo %X/%X; "
"tli %u; prev tli %u; fpw %s; xid %u/%u; oid %u; multi %u; offset %u; "
"oldest xid %u in DB %u; oldest multi %u in DB %u; "
"oldest/newest commit timestamp xid: %u/%u; "
"oldest running xid %u; %s",
(uint32) (checkpoint->redo >> 32), (uint32) checkpoint->redo,
checkpoint->ThisTimeLineID,
......@@ -58,6 +59,8 @@ xlog_desc(StringInfo buf, XLogReaderState *record)
checkpoint->oldestXidDB,
checkpoint->oldestMulti,
checkpoint->oldestMultiDB,
checkpoint->oldestCommitTs,
checkpoint->newestCommitTs,
checkpoint->oldestActiveXid,
(info == XLOG_CHECKPOINT_SHUTDOWN) ? "shutdown" : "online");
}
......
......@@ -12,8 +12,9 @@ subdir = src/backend/access/transam
top_builddir = ../../../..
include $(top_builddir)/src/Makefile.global
OBJS = clog.o transam.o varsup.o xact.o rmgr.o slru.o subtrans.o multixact.o \
timeline.o twophase.o twophase_rmgr.o xlog.o xlogarchive.o xlogfuncs.o \
OBJS = clog.o commit_ts.o multixact.o rmgr.o slru.o subtrans.o \
timeline.o transam.o twophase.o twophase_rmgr.o varsup.o \
xact.o xlog.o xlogarchive.o xlogfuncs.o \
xloginsert.o xlogreader.o xlogutils.o
include $(top_srcdir)/src/backend/common.mk
......
......@@ -840,7 +840,7 @@ parent transaction to complete.
Not all transactional behaviour is emulated, for example we do not insert
a transaction entry into the lock table, nor do we maintain the transaction
stack in memory. Clog and multixact entries are made normally.
stack in memory. Clog, multixact and commit_ts entries are made normally.
Subtrans is maintained during recovery but the details of the transaction
tree are ignored and all subtransactions reference the top-level TransactionId
directly. Since commit is atomic this provides correct lock wait behaviour
......
This diff is collapsed.
......@@ -8,6 +8,7 @@
#include "postgres.h"
#include "access/clog.h"
#include "access/commit_ts.h"
#include "access/gin.h"
#include "access/gist_private.h"
#include "access/hash.h"
......
......@@ -1297,7 +1297,7 @@ SlruScanDirectory(SlruCtl ctl, SlruScanCallback callback, void *data)
len = strlen(clde->d_name);
if ((len == 4 || len == 5) &&
if ((len == 4 || len == 5 || len == 6) &&
strspn(clde->d_name, "0123456789ABCDEF") == len)
{
segno = (int) strtol(clde->d_name, NULL, 16);
......
......@@ -14,6 +14,7 @@
#include "postgres.h"
#include "access/clog.h"
#include "access/commit_ts.h"
#include "access/subtrans.h"
#include "access/transam.h"
#include "access/xact.h"
......@@ -158,9 +159,10 @@ GetNewTransactionId(bool isSubXact)
* XID before we zero the page. Fortunately, a page of the commit log
* holds 32K or more transactions, so we don't have to do this very often.
*
* Extend pg_subtrans too.
* Extend pg_subtrans and pg_commit_ts too.
*/
ExtendCLOG(xid);
ExtendCommitTs(xid);
ExtendSUBTRANS(xid);
/*
......
......@@ -20,6 +20,7 @@
#include <time.h>
#include <unistd.h>
#include "access/commit_ts.h"
#include "access/multixact.h"
#include "access/subtrans.h"
#include "access/transam.h"
......@@ -1134,6 +1135,21 @@ RecordTransactionCommit(void)
}
}
/*
* We only need to log the commit timestamp separately if the node
* identifier is a valid value; the commit record above already contains
* the timestamp info otherwise, and will be used to load it.
*/
if (markXidCommitted)
{
CommitTsNodeId node_id;
node_id = CommitTsGetDefaultNodeId();
TransactionTreeSetCommitTsData(xid, nchildren, children,
xactStopTimestamp,
node_id, node_id != InvalidCommitTsNodeId);
}
/*
* Check if we want to commit asynchronously. We can allow the XLOG flush
* to happen asynchronously if synchronous_commit=off, or if the current
......@@ -4644,6 +4660,7 @@ xactGetCommittedChildren(TransactionId **ptr)
*/
static void
xact_redo_commit_internal(TransactionId xid, XLogRecPtr lsn,
TimestampTz commit_time,
TransactionId *sub_xids, int nsubxacts,
SharedInvalidationMessage *inval_msgs, int nmsgs,
RelFileNode *xnodes, int nrels,
......@@ -4671,6 +4688,10 @@ xact_redo_commit_internal(TransactionId xid, XLogRecPtr lsn,
LWLockRelease(XidGenLock);
}
/* Set the transaction commit timestamp and metadata */
TransactionTreeSetCommitTsData(xid, nsubxacts, sub_xids,
commit_time, InvalidCommitTsNodeId, false);
if (standbyState == STANDBY_DISABLED)
{
/*
......@@ -4790,7 +4811,8 @@ xact_redo_commit(xl_xact_commit *xlrec,
/* invalidation messages array follows subxids */
inval_msgs = (SharedInvalidationMessage *) &(subxacts[xlrec->nsubxacts]);
xact_redo_commit_internal(xid, lsn, subxacts, xlrec->nsubxacts,
xact_redo_commit_internal(xid, lsn, xlrec->xact_time,
subxacts, xlrec->nsubxacts,
inval_msgs, xlrec->nmsgs,
xlrec->xnodes, xlrec->nrels,
xlrec->dbId,
......@@ -4805,7 +4827,8 @@ static void
xact_redo_commit_compact(xl_xact_commit_compact *xlrec,
TransactionId xid, XLogRecPtr lsn)
{
xact_redo_commit_internal(xid, lsn, xlrec->subxacts, xlrec->nsubxacts,
xact_redo_commit_internal(xid, lsn, xlrec->xact_time,
xlrec->subxacts, xlrec->nsubxacts,
NULL, 0, /* inval msgs */
NULL, 0, /* relfilenodes */
InvalidOid, /* dbId */
......
......@@ -22,6 +22,7 @@
#include <unistd.h>
#include "access/clog.h"
#include "access/commit_ts.h"
#include "access/multixact.h"
#include "access/rewriteheap.h"
#include "access/subtrans.h"
......@@ -4518,6 +4519,8 @@ BootStrapXLOG(void)
checkPoint.oldestXidDB = TemplateDbOid;
checkPoint.oldestMulti = FirstMultiXactId;
checkPoint.oldestMultiDB = TemplateDbOid;
checkPoint.oldestCommitTs = InvalidTransactionId;
checkPoint.newestCommitTs = InvalidTransactionId;
checkPoint.time = (pg_time_t) time(NULL);
checkPoint.oldestActiveXid = InvalidTransactionId;
......@@ -4527,6 +4530,7 @@ BootStrapXLOG(void)
MultiXactSetNextMXact(checkPoint.nextMulti, checkPoint.nextMultiOffset);
SetTransactionIdLimit(checkPoint.oldestXid, checkPoint.oldestXidDB);
SetMultiXactIdLimit(checkPoint.oldestMulti, checkPoint.oldestMultiDB);
SetCommitTsLimit(InvalidTransactionId, InvalidTransactionId);
/* Set up the XLOG page header */
page->xlp_magic = XLOG_PAGE_MAGIC;
......@@ -4606,6 +4610,7 @@ BootStrapXLOG(void)
ControlFile->max_locks_per_xact = max_locks_per_xact;
ControlFile->wal_level = wal_level;
ControlFile->wal_log_hints = wal_log_hints;
ControlFile->track_commit_timestamp = track_commit_timestamp;
ControlFile->data_checksum_version = bootstrap_data_checksum_version;
/* some additional ControlFile fields are set in WriteControlFile() */
......@@ -4614,6 +4619,7 @@ BootStrapXLOG(void)
/* Bootstrap the commit log, too */
BootStrapCLOG();
BootStrapCommitTs();
BootStrapSUBTRANS();
BootStrapMultiXact();
......@@ -5920,6 +5926,10 @@ StartupXLOG(void)
ereport(DEBUG1,
(errmsg("oldest MultiXactId: %u, in database %u",
checkPoint.oldestMulti, checkPoint.oldestMultiDB)));
ereport(DEBUG1,
(errmsg("commit timestamp Xid oldest/newest: %u/%u",
checkPoint.oldestCommitTs,
checkPoint.newestCommitTs)));
if (!TransactionIdIsNormal(checkPoint.nextXid))
ereport(PANIC,
(errmsg("invalid next transaction ID")));
......@@ -5931,6 +5941,8 @@ StartupXLOG(void)
MultiXactSetNextMXact(checkPoint.nextMulti, checkPoint.nextMultiOffset);
SetTransactionIdLimit(checkPoint.oldestXid, checkPoint.oldestXidDB);
SetMultiXactIdLimit(checkPoint.oldestMulti, checkPoint.oldestMultiDB);
SetCommitTsLimit(checkPoint.oldestCommitTs,
checkPoint.newestCommitTs);
MultiXactSetSafeTruncate(checkPoint.oldestMulti);
XLogCtl->ckptXidEpoch = checkPoint.nextXidEpoch;
XLogCtl->ckptXid = checkPoint.nextXid;
......@@ -6153,11 +6165,12 @@ StartupXLOG(void)
ProcArrayInitRecovery(ShmemVariableCache->nextXid);
/*
* Startup commit log and subtrans only. MultiXact has already
* been started up and other SLRUs are not maintained during
* recovery and need not be started yet.
* Startup commit log, commit timestamp and subtrans only.
* MultiXact has already been started up and other SLRUs are not
* maintained during recovery and need not be started yet.
*/
StartupCLOG();
StartupCommitTs();
StartupSUBTRANS(oldestActiveXID);
/*
......@@ -6827,12 +6840,13 @@ StartupXLOG(void)
LWLockRelease(ProcArrayLock);
/*
* Start up the commit log and subtrans, if not already done for hot
* standby.
* Start up the commit log, commit timestamp and subtrans, if not already
* done for hot standby.
*/
if (standbyState == STANDBY_DISABLED)
{
StartupCLOG();
StartupCommitTs();
StartupSUBTRANS(oldestActiveXID);
}
......@@ -6867,6 +6881,12 @@ StartupXLOG(void)
LocalSetXLogInsertAllowed();
XLogReportParameters();
/*
* Local WAL inserts enabled, so it's time to finish initialization
* of commit timestamp.
*/
CompleteCommitTsInitialization();
/*
* All done. Allow backends to write WAL. (Although the bool flag is
* probably atomic in itself, we use the info_lck here to ensure that
......@@ -7433,6 +7453,7 @@ ShutdownXLOG(int code, Datum arg)
CreateCheckPoint(CHECKPOINT_IS_SHUTDOWN | CHECKPOINT_IMMEDIATE);
}
ShutdownCLOG();
ShutdownCommitTs();
ShutdownSUBTRANS();
ShutdownMultiXact();
......@@ -7769,6 +7790,11 @@ CreateCheckPoint(int flags)
checkPoint.oldestXidDB = ShmemVariableCache->oldestXidDB;
LWLockRelease(XidGenLock);
LWLockAcquire(CommitTsLock, LW_SHARED);
checkPoint.oldestCommitTs = ShmemVariableCache->oldestCommitTs;
checkPoint.newestCommitTs = ShmemVariableCache->newestCommitTs;
LWLockRelease(CommitTsLock);
/* Increase XID epoch if we've wrapped around since last checkpoint */
checkPoint.nextXidEpoch = ControlFile->checkPointCopy.nextXidEpoch;
if (checkPoint.nextXid < ControlFile->checkPointCopy.nextXid)
......@@ -8046,6 +8072,7 @@ static void
CheckPointGuts(XLogRecPtr checkPointRedo, int flags)
{
CheckPointCLOG();
CheckPointCommitTs();
CheckPointSUBTRANS();
CheckPointMultiXact();
CheckPointPredicate();
......@@ -8474,7 +8501,8 @@ XLogReportParameters(void)
MaxConnections != ControlFile->MaxConnections ||
max_worker_processes != ControlFile->max_worker_processes ||
max_prepared_xacts != ControlFile->max_prepared_xacts ||
max_locks_per_xact != ControlFile->max_locks_per_xact)
max_locks_per_xact != ControlFile->max_locks_per_xact ||
track_commit_timestamp != ControlFile->track_commit_timestamp)
{
/*
* The change in number of backend slots doesn't need to be WAL-logged
......@@ -8494,6 +8522,7 @@ XLogReportParameters(void)
xlrec.max_locks_per_xact = max_locks_per_xact;
xlrec.wal_level = wal_level;
xlrec.wal_log_hints = wal_log_hints;
xlrec.track_commit_timestamp = track_commit_timestamp;
XLogBeginInsert();
XLogRegisterData((char *) &xlrec, sizeof(xlrec));
......@@ -8508,6 +8537,7 @@ XLogReportParameters(void)
ControlFile->max_locks_per_xact = max_locks_per_xact;
ControlFile->wal_level = wal_level;
ControlFile->wal_log_hints = wal_log_hints;
ControlFile->track_commit_timestamp = track_commit_timestamp;
UpdateControlFile();
}
}
......@@ -8884,6 +8914,7 @@ xlog_redo(XLogReaderState *record)
ControlFile->max_locks_per_xact = xlrec.max_locks_per_xact;
ControlFile->wal_level = xlrec.wal_level;
ControlFile->wal_log_hints = wal_log_hints;
ControlFile->track_commit_timestamp = track_commit_timestamp;
/*
* Update minRecoveryPoint to ensure that if recovery is aborted, we
......
......@@ -23,6 +23,7 @@
#include <math.h>
#include "access/clog.h"
#include "access/commit_ts.h"
#include "access/genam.h"
#include "access/heapam.h"
#include "access/htup_details.h"
......@@ -1071,10 +1072,12 @@ vac_truncate_clog(TransactionId frozenXID,
return;
/*
* Truncate CLOG to the oldest computed value. Note we don't truncate
* multixacts; that will be done by the next checkpoint.
* Truncate CLOG and CommitTs to the oldest computed value.
* Note we don't truncate multixacts; that will be done by the next
* checkpoint.
*/
TruncateCLOG(frozenXID);
TruncateCommitTs(frozenXID);
/*
* Update the wrap limit for GetNewTransactionId and creation of new
......@@ -1084,6 +1087,7 @@ vac_truncate_clog(TransactionId frozenXID,
*/
SetTransactionIdLimit(frozenXID, oldestxid_datoid);
SetMultiXactIdLimit(minMulti, minmulti_datoid);
AdvanceOldestCommitTs(frozenXID);
}
......
......@@ -1438,7 +1438,7 @@ parse_hba_auth_opt(char *name, char *val, HbaLine *hbaline, int line_num)
ereport(LOG,
(errcode(ERRCODE_CONFIG_FILE_ERROR),
errmsg("client certificates can only be checked if a root certificate store is available"),
errhint("Make sure the configuration parameter \"ssl_ca_file\" is set."),
errhint("Make sure the configuration parameter \"%s\" is set.", "ssl_ca_file"),
errcontext("line %d of configuration file \"%s\"",
line_num, HbaFileName)));
return false;
......
......@@ -133,6 +133,7 @@ LogicalDecodingProcessRecord(LogicalDecodingContext *ctx, XLogReaderState *recor
case RM_SEQ_ID:
case RM_SPGIST_ID:
case RM_BRIN_ID:
case RM_COMMIT_TS_ID:
break;
case RM_NEXT_ID:
elog(ERROR, "unexpected RM_NEXT_ID rmgr_id: %u", (RmgrIds) XLogRecGetRmid(buf.record));
......
......@@ -15,6 +15,7 @@
#include "postgres.h"
#include "access/clog.h"
#include "access/commit_ts.h"
#include "access/heapam.h"
#include "access/multixact.h"
#include "access/nbtree.h"
......@@ -117,6 +118,7 @@ CreateSharedMemoryAndSemaphores(bool makePrivate, int port)
size = add_size(size, ProcGlobalShmemSize());
size = add_size(size, XLOGShmemSize());
size = add_size(size, CLOGShmemSize());
size = add_size(size, CommitTsShmemSize());
size = add_size(size, SUBTRANSShmemSize());
size = add_size(size, TwoPhaseShmemSize());
size = add_size(size, BackgroundWorkerShmemSize());
......@@ -198,6 +200,7 @@ CreateSharedMemoryAndSemaphores(bool makePrivate, int port)
*/
XLOGShmemInit();
CLOGShmemInit();
CommitTsShmemInit();
SUBTRANSShmemInit();
MultiXactShmemInit();
InitBufferPool();
......
......@@ -29,6 +29,7 @@
#include "postgres.h"
#include "access/clog.h"
#include "access/commit_ts.h"
#include "access/multixact.h"
#include "access/subtrans.h"
#include "commands/async.h"
......@@ -259,6 +260,9 @@ NumLWLocks(void)
/* clog.c needs one per CLOG buffer */
numLocks += CLOGShmemBuffers();
/* commit_ts.c needs one per CommitTs buffer */
numLocks += CommitTsShmemBuffers();
/* subtrans.c needs one per SubTrans buffer */
numLocks += NUM_SUBTRANS_BUFFERS;
......
......@@ -26,6 +26,7 @@
#include <syslog.h>
#endif
#include "access/commit_ts.h"
#include "access/gin.h"
#include "access/transam.h"
#include "access/twophase.h"
......@@ -835,6 +836,15 @@ static struct config_bool ConfigureNamesBool[] =
false,
check_bonjour, NULL, NULL
},
{
{"track_commit_timestamp", PGC_POSTMASTER, REPLICATION,
gettext_noop("Collects transaction commit time."),
NULL
},
&track_commit_timestamp,
false,
NULL, NULL, NULL
},
{
{"ssl", PGC_POSTMASTER, CONN_AUTH_SECURITY,
gettext_noop("Enables SSL connections."),
......
......@@ -228,6 +228,8 @@
#max_replication_slots = 0 # max number of replication slots
# (change requires restart)
#track_commit_timestamp = off # collect timestamp of transaction commit
# (change requires restart)
# - Master Server -
......
......@@ -186,6 +186,7 @@ static const char *subdirs[] = {
"pg_xlog",
"pg_xlog/archive_status",
"pg_clog",
"pg_commit_ts",
"pg_dynshmem",
"pg_notify",
"pg_serial",
......
......@@ -270,6 +270,8 @@ main(int argc, char *argv[])
ControlFile.checkPointCopy.oldestMulti);
printf(_("Latest checkpoint's oldestMulti's DB: %u\n"),
ControlFile.checkPointCopy.oldestMultiDB);
printf(_("Latest checkpoint's oldestCommitTs: %u\n"),
ControlFile.checkPointCopy.oldestCommitTs);
printf(_("Time of latest checkpoint: %s\n"),
ckpttime_str);
printf(_("Fake LSN counter for unlogged rels: %X/%X\n"),
......@@ -300,6 +302,8 @@ main(int argc, char *argv[])
ControlFile.max_prepared_xacts);
printf(_("Current max_locks_per_xact setting: %d\n"),
ControlFile.max_locks_per_xact);
printf(_("Current track_commit_timestamp setting: %s\n"),
ControlFile.track_commit_timestamp ? _("on") : _("off"));
printf(_("Maximum data alignment: %u\n"),
ControlFile.maxAlign);
/* we don't print floatFormat since can't say much useful about it */
......
......@@ -63,6 +63,8 @@ static bool guessed = false; /* T if we had to guess at any values */
static const char *progname;
static uint32 set_xid_epoch = (uint32) -1;
static TransactionId set_xid = 0;
static TransactionId set_oldest_commit_ts = 0;
static TransactionId set_newest_commit_ts = 0;
static Oid set_oid = 0;
static MultiXactId set_mxid = 0;
static MultiXactOffset set_mxoff = (MultiXactOffset) -1;
......@@ -112,7 +114,7 @@ main(int argc, char *argv[])
}
while ((c = getopt(argc, argv, "D:fl:m:no:O:x:e:")) != -1)
while ((c = getopt(argc, argv, "c:D:e:fl:m:no:O:x:")) != -1)
{
switch (c)
{
......@@ -132,7 +134,9 @@ main(int argc, char *argv[])
set_xid_epoch = strtoul(optarg, &endptr, 0);
if (endptr == optarg || *endptr != '\0')
{
fprintf(stderr, _("%s: invalid argument for option -e\n"), progname);
/*------
translator: the second %s is a command line argument (-e, etc) */
fprintf(stderr, _("%s: invalid argument for option %s\n"), progname, "-e");
fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname);
exit(1);
}
......@@ -147,7 +151,7 @@ main(int argc, char *argv[])
set_xid = strtoul(optarg, &endptr, 0);
if (endptr == optarg || *endptr != '\0')
{
fprintf(stderr, _("%s: invalid argument for option -x\n"), progname);
fprintf(stderr, _("%s: invalid argument for option %s\n"), progname, "-x");
fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname);
exit(1);
}
......@@ -158,11 +162,42 @@ main(int argc, char *argv[])
}
break;
case 'c':
set_oldest_commit_ts = strtoul(optarg, &endptr, 0);
if (endptr == optarg || *endptr != ',')
{
fprintf(stderr, _("%s: invalid argument for option %s\n"), progname, "-c");
fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname);
exit(1);
}
set_newest_commit_ts = strtoul(endptr + 1, &endptr2, 0);
if (endptr2 == endptr + 1 || *endptr2 != '\0')
{
fprintf(stderr, _("%s: invalid argument for option %s\n"), progname, "-c");
fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname);
exit(1);
}
if (set_oldest_commit_ts < 2 &&
set_oldest_commit_ts != 0)
{
fprintf(stderr, _("%s: transaction ID (-c) must be either 0 or greater than or equal to 2\n"), progname);
exit(1);
}
if (set_newest_commit_ts < 2 &&
set_newest_commit_ts != 0)
{
fprintf(stderr, _("%s: transaction ID (-c) must be either 0 or greater than or equal to 2\n"), progname);
exit(1);
}
break;
case 'o':
set_oid = strtoul(optarg, &endptr, 0);
if (endptr == optarg || *endptr != '\0')
{
fprintf(stderr, _("%s: invalid argument for option -o\n"), progname);
fprintf(stderr, _("%s: invalid argument for option %s\n"), progname, "-o");
fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname);
exit(1);
}
......@@ -177,7 +212,7 @@ main(int argc, char *argv[])
set_mxid = strtoul(optarg, &endptr, 0);
if (endptr == optarg || *endptr != ',')
{
fprintf(stderr, _("%s: invalid argument for option -m\n"), progname);
fprintf(stderr, _("%s: invalid argument for option %s\n"), progname, "-m");
fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname);
exit(1);
}
......@@ -185,7 +220,7 @@ main(int argc, char *argv[])
set_oldestmxid = strtoul(endptr + 1, &endptr2, 0);
if (endptr2 == endptr + 1 || *endptr2 != '\0')
{
fprintf(stderr, _("%s: invalid argument for option -m\n"), progname);
fprintf(stderr, _("%s: invalid argument for option %s\n"), progname, "-m");
fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname);
exit(1);
}
......@@ -211,7 +246,7 @@ main(int argc, char *argv[])
set_mxoff = strtoul(optarg, &endptr, 0);
if (endptr == optarg || *endptr != '\0')
{
fprintf(stderr, _("%s: invalid argument for option -O\n"), progname);
fprintf(stderr, _("%s: invalid argument for option %s\n"), progname, "-O");
fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname);
exit(1);
}
......@@ -225,7 +260,7 @@ main(int argc, char *argv[])
case 'l':
if (strspn(optarg, "01234567890ABCDEFabcdef") != 24)
{
fprintf(stderr, _("%s: invalid argument for option -l\n"), progname);
fprintf(stderr, _("%s: invalid argument for option %s\n"), progname, "-l");
fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname);
exit(1);
}
......@@ -345,6 +380,11 @@ main(int argc, char *argv[])
ControlFile.checkPointCopy.oldestXidDB = InvalidOid;
}
if (set_oldest_commit_ts != 0)
ControlFile.checkPointCopy.oldestCommitTs = set_oldest_commit_ts;
if (set_newest_commit_ts != 0)
ControlFile.checkPointCopy.newestCommitTs = set_newest_commit_ts;
if (set_oid != 0)
ControlFile.checkPointCopy.nextOid = set_oid;
......@@ -539,6 +579,7 @@ GuessControlValues(void)
ControlFile.wal_level = WAL_LEVEL_MINIMAL;
ControlFile.wal_log_hints = false;
ControlFile.track_commit_timestamp = false;
ControlFile.MaxConnections = 100;
ControlFile.max_worker_processes = 8;
ControlFile.max_prepared_xacts = 0;
......@@ -621,6 +662,10 @@ PrintControlValues(bool guessed)
ControlFile.checkPointCopy.oldestMulti);
printf(_("Latest checkpoint's oldestMulti's DB: %u\n"),
ControlFile.checkPointCopy.oldestMultiDB);
printf(_("Latest checkpoint's oldest CommitTs: %u\n"),
ControlFile.checkPointCopy.oldestCommitTs);
printf(_("Latest checkpoint's newest CommitTs: %u\n"),
ControlFile.checkPointCopy.newestCommitTs);
printf(_("Maximum data alignment: %u\n"),
ControlFile.maxAlign);
/* we don't print floatFormat since can't say much useful about it */
......@@ -702,6 +747,17 @@ PrintNewControlValues()
printf(_("NextXID epoch: %u\n"),
ControlFile.checkPointCopy.nextXidEpoch);
}
if (set_oldest_commit_ts != 0)
{
printf(_("oldestCommitTs: %u\n"),
ControlFile.checkPointCopy.oldestCommitTs);
}
if (set_newest_commit_ts != 0)
{
printf(_("newestCommitTs: %u\n"),
ControlFile.checkPointCopy.newestCommitTs);
}
}
......@@ -739,6 +795,7 @@ RewriteControlFile(void)
*/
ControlFile.wal_level = WAL_LEVEL_MINIMAL;
ControlFile.wal_log_hints = false;
ControlFile.track_commit_timestamp = false;
ControlFile.MaxConnections = 100;
ControlFile.max_worker_processes = 8;
ControlFile.max_prepared_xacts = 0;
......@@ -1099,6 +1156,8 @@ usage(void)
printf(_("%s resets the PostgreSQL transaction log.\n\n"), progname);
printf(_("Usage:\n %s [OPTION]... {[-D] DATADIR}\n\n"), progname);
printf(_("Options:\n"));
printf(_(" -c XID,XID set oldest and newest transactions bearing commit timestamp\n"));
printf(_(" (zero in either value means no change)\n"));
printf(_(" -e XIDEPOCH set next transaction ID epoch\n"));
printf(_(" -f force update to be done\n"));
printf(_(" -l XLOGFILE force minimum WAL starting location for new transaction log\n"));
......
/*
* commit_ts.h
*
* PostgreSQL commit timestamp manager
*
* Portions Copyright (c) 1996-2014, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* src/include/access/commit_ts.h
*/
#ifndef COMMIT_TS_H
#define COMMIT_TS_H
#include "access/xlog.h"
#include "datatype/timestamp.h"
#include "utils/guc.h"
extern PGDLLIMPORT bool track_commit_timestamp;
extern bool check_track_commit_timestamp(bool *newval, void **extra,
GucSource source);
typedef uint32 CommitTsNodeId;
#define InvalidCommitTsNodeId 0
extern void CommitTsSetDefaultNodeId(CommitTsNodeId nodeid);
extern CommitTsNodeId CommitTsGetDefaultNodeId(void);
extern void TransactionTreeSetCommitTsData(TransactionId xid, int nsubxids,
TransactionId *subxids, TimestampTz timestamp,
CommitTsNodeId nodeid, bool do_xlog);
extern bool TransactionIdGetCommitTsData(TransactionId xid,
TimestampTz *ts, CommitTsNodeId *nodeid);
extern TransactionId GetLatestCommitTsData(TimestampTz *ts,
CommitTsNodeId *nodeid);
extern Size CommitTsShmemBuffers(void);
extern Size CommitTsShmemSize(void);
extern void CommitTsShmemInit(void);
extern void BootStrapCommitTs(void);
extern void StartupCommitTs(void);
extern void CompleteCommitTsInitialization(void);
extern void ShutdownCommitTs(void);
extern void CheckPointCommitTs(void);
extern void ExtendCommitTs(TransactionId newestXact);
extern void TruncateCommitTs(TransactionId oldestXact);
extern void SetCommitTsLimit(TransactionId oldestXact,
TransactionId newestXact);
extern void AdvanceOldestCommitTs(TransactionId oldestXact);
/* XLOG stuff */
#define COMMIT_TS_ZEROPAGE 0x00
#define COMMIT_TS_TRUNCATE 0x10
#define COMMIT_TS_SETTS 0x20
typedef struct xl_commit_ts_set
{
TimestampTz timestamp;
CommitTsNodeId nodeid;
TransactionId mainxid;
/* subxact Xids follow */
} xl_commit_ts_set;
#define SizeOfCommitTsSet (offsetof(xl_commit_ts_set, mainxid) + \
sizeof(TransactionId))
extern void commit_ts_redo(XLogReaderState *record);
extern void commit_ts_desc(StringInfo buf, XLogReaderState *record);
extern const char *commit_ts_identify(uint8 info);
#endif /* COMMITTS_H */
......@@ -43,3 +43,4 @@ PG_RMGR(RM_GIST_ID, "Gist", gist_redo, gist_desc, gist_identify, gist_xlog_start
PG_RMGR(RM_SEQ_ID, "Sequence", seq_redo, seq_desc, seq_identify, NULL, NULL)
PG_RMGR(RM_SPGIST_ID, "SPGist", spg_redo, spg_desc, spg_identify, spg_xlog_startup, spg_xlog_cleanup)
PG_RMGR(RM_BRIN_ID, "BRIN", brin_redo, brin_desc, brin_identify, NULL, NULL)
PG_RMGR(RM_COMMIT_TS_ID, "CommitTs", commit_ts_redo, commit_ts_desc, commit_ts_identify, NULL, NULL)
......@@ -123,6 +123,12 @@ typedef struct VariableCacheData
TransactionId xidWrapLimit; /* where the world ends */
Oid oldestXidDB; /* database with minimum datfrozenxid */
/*
* These fields are protected by CommitTsLock
*/
TransactionId oldestCommitTs;
TransactionId newestCommitTs;
/*
* These fields are protected by ProcArrayLock.
*/
......
......@@ -186,6 +186,7 @@ typedef struct xl_parameter_change
int max_locks_per_xact;
int wal_level;
bool wal_log_hints;
bool track_commit_timestamp;
} xl_parameter_change;
/* logs restore point */
......
......@@ -53,6 +53,6 @@
*/
/* yyyymmddN */
#define CATALOG_VERSION_NO 201412021
#define CATALOG_VERSION_NO 201412022
#endif
......@@ -46,6 +46,8 @@ typedef struct CheckPoint
MultiXactId oldestMulti; /* cluster-wide minimum datminmxid */
Oid oldestMultiDB; /* database with minimum datminmxid */
pg_time_t time; /* time stamp of checkpoint */
TransactionId oldestCommitTs; /* oldest Xid with valid commit timestamp */
TransactionId newestCommitTs; /* newest Xid with valid commit timestamp */
/*
* Oldest XID still running. This is only needed to initialize hot standby
......@@ -177,6 +179,7 @@ typedef struct ControlFileData
int max_worker_processes;
int max_prepared_xacts;
int max_locks_per_xact;
bool track_commit_timestamp;
/*
* This data is used to check for hardware-architecture compatibility of
......
......@@ -3023,6 +3023,12 @@ DESCR("view two-phase transactions");
DATA(insert OID = 3819 ( pg_get_multixact_members PGNSP PGUID 12 1 1000 0 0 f f f f t t v 1 0 2249 "28" "{28,28,25}" "{i,o,o}" "{multixid,xid,mode}" _null_ pg_get_multixact_members _null_ _null_ _null_ ));
DESCR("view members of a multixactid");
DATA(insert OID = 3581 ( pg_xact_commit_timestamp PGNSP PGUID 12 1 0 0 0 f f f f t f s 1 0 1184 "28" _null_ _null_ _null_ _null_ pg_xact_commit_timestamp _null_ _null_ _null_ ));
DESCR("get commit timestamp of a transaction");
DATA(insert OID = 3583 ( pg_last_committed_xact PGNSP PGUID 12 1 0 0 0 f f f f t f s 0 0 2249 "" "{28,1184}" "{o,o}" "{xid,timestamp}" _null_ pg_last_committed_xact _null_ _null_ _null_ ));
DESCR("get transaction Id and commit timestamp of latest transaction commit");
DATA(insert OID = 3537 ( pg_describe_object PGNSP PGUID 12 1 0 0 0 f f f f t f s 3 0 25 "26 26 23" _null_ _null_ _null_ _null_ pg_describe_object _null_ _null_ _null_ ));
DESCR("get identification of SQL object");
......
......@@ -127,7 +127,10 @@ extern PGDLLIMPORT LWLockPadded *MainLWLockArray;
#define AutoFileLock (&MainLWLockArray[35].lock)
#define ReplicationSlotAllocationLock (&MainLWLockArray[36].lock)
#define ReplicationSlotControlLock (&MainLWLockArray[37].lock)
#define NUM_INDIVIDUAL_LWLOCKS 38
#define CommitTsControlLock (&MainLWLockArray[38].lock)
#define CommitTsLock (&MainLWLockArray[39].lock)
#define NUM_INDIVIDUAL_LWLOCKS 40
/*
* It's a bit odd to declare NUM_BUFFER_PARTITIONS and NUM_LOCK_PARTITIONS
......
......@@ -1187,6 +1187,10 @@ extern Datum pg_prepared_xact(PG_FUNCTION_ARGS);
/* access/transam/multixact.c */
extern Datum pg_get_multixact_members(PG_FUNCTION_ARGS);
/* access/transam/committs.c */
extern Datum pg_xact_commit_timestamp(PG_FUNCTION_ARGS);
extern Datum pg_last_committed_xact(PG_FUNCTION_ARGS);
/* catalogs/dependency.c */
extern Datum pg_describe_object(PG_FUNCTION_ARGS);
extern Datum pg_identify_object(PG_FUNCTION_ARGS);
......
......@@ -5,6 +5,7 @@ top_builddir = ../../..
include $(top_builddir)/src/Makefile.global
SUBDIRS = \
commit_ts \
worker_spi \
dummy_seclabel \
test_shm_mq \
......
# Generated subdirectories
/log/
/results/
/tmp_check/
# src/test/modules/commit_ts/Makefile
REGRESS = commit_timestamp
REGRESS_OPTS = --temp-config=$(top_srcdir)/src/test/modules/commit_ts/commit_ts.conf
ifdef USE_PGXS
PG_CONFIG = pg_config
PGXS := $(shell $(PG_CONFIG) --pgxs)
include $(PGXS)
else
subdir = src/test/modules/commit_ts
top_builddir = ../../../..
include $(top_builddir)/src/Makefile.global
include $(top_srcdir)/contrib/contrib-global.mk
endif
track_commit_timestamp = on
\ No newline at end of file
--
-- Commit Timestamp
--
SHOW track_commit_timestamp;
track_commit_timestamp
------------------------
on
(1 row)
CREATE TABLE committs_test(id serial, ts timestamptz default now());
INSERT INTO committs_test DEFAULT VALUES;
INSERT INTO committs_test DEFAULT VALUES;
INSERT INTO committs_test DEFAULT VALUES;
SELECT id,
pg_xact_commit_timestamp(xmin) >= ts,
pg_xact_commit_timestamp(xmin) < now(),
pg_xact_commit_timestamp(xmin) - ts < '60s' -- 60s should give a lot of reserve
FROM committs_test
ORDER BY id;
id | ?column? | ?column? | ?column?
----+----------+----------+----------
1 | t | t | t
2 | t | t | t
3 | t | t | t
(3 rows)
DROP TABLE committs_test;
SELECT pg_xact_commit_timestamp('0'::xid);
ERROR: cannot retrieve commit timestamp for transaction 0
SELECT pg_xact_commit_timestamp('1'::xid);
ERROR: cannot retrieve commit timestamp for transaction 1
SELECT pg_xact_commit_timestamp('2'::xid);
ERROR: cannot retrieve commit timestamp for transaction 2
SELECT x.xid::text::bigint > 0, x.timestamp > '-infinity'::timestamptz, x.timestamp < now() FROM pg_last_committed_xact() x;
?column? | ?column? | ?column?
----------+----------+----------
t | t | t
(1 row)
--
-- Commit Timestamp
--
SHOW track_commit_timestamp;
track_commit_timestamp
------------------------
off
(1 row)
CREATE TABLE committs_test(id serial, ts timestamptz default now());
INSERT INTO committs_test DEFAULT VALUES;
INSERT INTO committs_test DEFAULT VALUES;
INSERT INTO committs_test DEFAULT VALUES;
SELECT id,
pg_xact_commit_timestamp(xmin) >= ts,
pg_xact_commit_timestamp(xmin) < now(),
pg_xact_commit_timestamp(xmin) - ts < '60s' -- 60s should give a lot of reserve
FROM committs_test
ORDER BY id;
ERROR: could not get commit timestamp data
HINT: Make sure the configuration parameter "track_commit_timestamp" is set.
DROP TABLE committs_test;
SELECT pg_xact_commit_timestamp('0'::xid);
ERROR: could not get commit timestamp data
HINT: Make sure the configuration parameter "track_commit_timestamp" is set.
SELECT pg_xact_commit_timestamp('1'::xid);
ERROR: could not get commit timestamp data
HINT: Make sure the configuration parameter "track_commit_timestamp" is set.
SELECT pg_xact_commit_timestamp('2'::xid);
ERROR: could not get commit timestamp data
HINT: Make sure the configuration parameter "track_commit_timestamp" is set.
SELECT x.xid::text::bigint > 0, x.timestamp > '-infinity'::timestamptz, x.timestamp < now() FROM pg_last_committed_xact() x;
ERROR: could not get commit timestamp data
HINT: Make sure the configuration parameter "track_commit_timestamp" is set.
--
-- Commit Timestamp
--
SHOW track_commit_timestamp;
CREATE TABLE committs_test(id serial, ts timestamptz default now());
INSERT INTO committs_test DEFAULT VALUES;
INSERT INTO committs_test DEFAULT VALUES;
INSERT INTO committs_test DEFAULT VALUES;
SELECT id,
pg_xact_commit_timestamp(xmin) >= ts,
pg_xact_commit_timestamp(xmin) < now(),
pg_xact_commit_timestamp(xmin) - ts < '60s' -- 60s should give a lot of reserve
FROM committs_test
ORDER BY id;
DROP TABLE committs_test;
SELECT pg_xact_commit_timestamp('0'::xid);
SELECT pg_xact_commit_timestamp('1'::xid);
SELECT pg_xact_commit_timestamp('2'::xid);
SELECT x.xid::text::bigint > 0, x.timestamp > '-infinity'::timestamptz, x.timestamp < now() FROM pg_last_committed_xact() x;
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment