Commit 35af5422 authored by Tom Lane's avatar Tom Lane

Make the server track an 'XID epoch', that is, maintain higher-order bits

of the transaction ID counter.  Nothing is done with the epoch except to
store it in checkpoint records, but this provides a foundation with which
add-on code can pretend that XIDs never wrap around.  This is a severely
trimmed and rewritten version of the xxid patch submitted by Marko Kreen.
Per discussion, the epoch counter seems the only part of xxid that really
needs to be in the core server.
parent 1054c380
<!--
$PostgreSQL: pgsql/doc/src/sgml/ref/pg_resetxlog.sgml,v 1.16 2006/06/18 15:38:36 petere Exp $
$PostgreSQL: pgsql/doc/src/sgml/ref/pg_resetxlog.sgml,v 1.17 2006/08/21 16:16:31 tgl Exp $
PostgreSQL documentation
-->
......@@ -22,6 +22,7 @@ PostgreSQL documentation
<arg>-n</arg>
<arg>-o<replaceable class="parameter">oid</replaceable> </arg>
<arg>-x <replaceable class="parameter">xid</replaceable> </arg>
<arg>-e <replaceable class="parameter">xid_epoch</replaceable> </arg>
<arg>-m <replaceable class="parameter">mxid</replaceable> </arg>
<arg>-O <replaceable class="parameter">mxoff</replaceable> </arg>
<arg>-l <replaceable class="parameter">timelineid</replaceable>,<replaceable class="parameter">fileid</replaceable>,<replaceable class="parameter">seg</replaceable> </arg>
......@@ -61,9 +62,9 @@ PostgreSQL documentation
by specifying the <literal>-f</> (force) switch. In this case plausible
values will be substituted for the missing data. Most of the fields can be
expected to match, but manual assistance may be needed for the next OID,
next transaction ID, next multitransaction ID and offset,
next transaction ID and epoch, next multitransaction ID and offset,
WAL starting address, and database locale fields.
The first five of these can be set using the switches discussed below.
The first six of these can be set using the switches discussed below.
<command>pg_resetxlog</command>'s own environment is the source for its
guess at the locale fields; take care that <envar>LANG</> and so forth
match the environment that <command>initdb</> was run in.
......@@ -76,11 +77,12 @@ PostgreSQL documentation
</para>
<para>
The <literal>-o</>, <literal>-x</>, <literal>-m</>, <literal>-O</>,
The <literal>-o</>, <literal>-x</>, <literal>-e</>,
<literal>-m</>, <literal>-O</>,
and <literal>-l</>
switches allow the next OID, next transaction ID, next multitransaction
ID, next multitransaction offset, and WAL starting address values to
be set manually. These are only needed when
switches allow the next OID, next transaction ID, next transaction ID's
epoch, next multitransaction ID, next multitransaction offset, and WAL
starting address values to be set manually. These are only needed when
<command>pg_resetxlog</command> is unable to determine appropriate values
by reading <filename>pg_control</>. Safe values may be determined as
follows:
......@@ -146,6 +148,18 @@ PostgreSQL documentation
get the next-OID setting right.
</para>
</listitem>
<listitem>
<para>
The transaction ID epoch is not actually stored anywhere in the database
except in the field that is set by <command>pg_resetxlog</command>,
so any value will work so far as the database itself is concerned.
You might need to adjust this value to ensure that replication
systems such as <application>Slony-I</> work correctly &mdash;
if so, an appropriate value should be obtainable from the state of
the downstream replicated database.
</para>
</listitem>
</itemizedlist>
</para>
......
......@@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2006, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* $PostgreSQL: pgsql/src/backend/access/transam/xlog.c,v 1.248 2006/08/17 23:04:05 tgl Exp $
* $PostgreSQL: pgsql/src/backend/access/transam/xlog.c,v 1.249 2006/08/21 16:16:31 tgl Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -312,10 +312,8 @@ static XLogRecPtr RedoRecPtr;
* new log file.
*
* CheckpointLock: must be held to do a checkpoint (ensures only one
* checkpointer at a time; even though the postmaster won't launch
* parallel checkpoint processes, we need this because manual checkpoints
* could be launched simultaneously). XXX now that all checkpoints are
* done by the bgwriter, isn't this lock redundant?
* checkpointer at a time; currently, with all checkpoints done by the
* bgwriter, this is just pro forma).
*
*----------
*/
......@@ -363,9 +361,13 @@ typedef struct XLogCtlData
{
/* Protected by WALInsertLock: */
XLogCtlInsert Insert;
/* Protected by info_lck: */
XLogwrtRqst LogwrtRqst;
XLogwrtResult LogwrtResult;
uint32 ckptXidEpoch; /* nextXID & epoch of latest checkpoint */
TransactionId ckptXid;
/* Protected by WALWriteLock: */
XLogCtlWrite Write;
......@@ -380,7 +382,7 @@ typedef struct XLogCtlData
int XLogCacheBlck; /* highest allocated xlog buffer index */
TimeLineID ThisTimeLineID;
slock_t info_lck; /* locks shared LogwrtRqst/LogwrtResult */
slock_t info_lck; /* locks shared variables shown above */
} XLogCtlData;
static XLogCtlData *XLogCtl = NULL;
......@@ -4086,6 +4088,7 @@ BootStrapXLOG(void)
checkPoint.redo.xrecoff = SizeOfXLogLongPHD;
checkPoint.undo = checkPoint.redo;
checkPoint.ThisTimeLineID = ThisTimeLineID;
checkPoint.nextXidEpoch = 0;
checkPoint.nextXid = FirstNormalTransactionId;
checkPoint.nextOid = FirstBootstrapObjectId;
checkPoint.nextMulti = FirstMultiXactId;
......@@ -4752,8 +4755,9 @@ StartupXLOG(void)
checkPoint.undo.xlogid, checkPoint.undo.xrecoff,
wasShutdown ? "TRUE" : "FALSE")));
ereport(LOG,
(errmsg("next transaction ID: %u; next OID: %u",
checkPoint.nextXid, checkPoint.nextOid)));
(errmsg("next transaction ID: %u/%u; next OID: %u",
checkPoint.nextXidEpoch, checkPoint.nextXid,
checkPoint.nextOid)));
ereport(LOG,
(errmsg("next MultiXactId: %u; next MultiXactOffset: %u",
checkPoint.nextMulti, checkPoint.nextMultiOffset)));
......@@ -5135,6 +5139,10 @@ StartupXLOG(void)
/* start the archive_timeout timer running */
XLogCtl->Write.lastSegSwitchTime = ControlFile->time;
/* initialize shared-memory copy of latest checkpoint XID/epoch */
XLogCtl->ckptXidEpoch = ControlFile->checkPointCopy.nextXidEpoch;
XLogCtl->ckptXid = ControlFile->checkPointCopy.nextXid;
/* Start up the commit log and related stuff, too */
StartupCLOG();
StartupSUBTRANS(oldestActiveXID);
......@@ -5364,6 +5372,46 @@ GetRecentNextXid(void)
return ControlFile->checkPointCopy.nextXid;
}
/*
* GetNextXidAndEpoch - get the current nextXid value and associated epoch
*
* This is exported for use by code that would like to have 64-bit XIDs.
* We don't really support such things, but all XIDs within the system
* can be presumed "close to" the result, and thus the epoch associated
* with them can be determined.
*/
void
GetNextXidAndEpoch(TransactionId *xid, uint32 *epoch)
{
uint32 ckptXidEpoch;
TransactionId ckptXid;
TransactionId nextXid;
/* Must read checkpoint info first, else have race condition */
{
/* use volatile pointer to prevent code rearrangement */
volatile XLogCtlData *xlogctl = XLogCtl;
SpinLockAcquire(&xlogctl->info_lck);
ckptXidEpoch = xlogctl->ckptXidEpoch;
ckptXid = xlogctl->ckptXid;
SpinLockRelease(&xlogctl->info_lck);
}
/* Now fetch current nextXid */
nextXid = ReadNewTransactionId();
/*
* nextXid is certainly logically later than ckptXid. So if it's
* numerically less, it must have wrapped into the next epoch.
*/
if (nextXid < ckptXid)
ckptXidEpoch++;
*xid = nextXid;
*epoch = ckptXidEpoch;
}
/*
* This must be called ONCE during postmaster or standalone-backend shutdown
*/
......@@ -5531,6 +5579,11 @@ CreateCheckPoint(bool shutdown, bool force)
checkPoint.nextXid = ShmemVariableCache->nextXid;
LWLockRelease(XidGenLock);
/* Increase XID epoch if we've wrapped around since last checkpoint */
checkPoint.nextXidEpoch = ControlFile->checkPointCopy.nextXidEpoch;
if (checkPoint.nextXid < ControlFile->checkPointCopy.nextXid)
checkPoint.nextXidEpoch++;
LWLockAcquire(OidGenLock, LW_SHARED);
checkPoint.nextOid = ShmemVariableCache->nextOid;
if (!shutdown)
......@@ -5600,6 +5653,17 @@ CreateCheckPoint(bool shutdown, bool force)
UpdateControlFile();
LWLockRelease(ControlFileLock);
/* Update shared-memory copy of checkpoint XID/epoch */
{
/* use volatile pointer to prevent code rearrangement */
volatile XLogCtlData *xlogctl = XLogCtl;
SpinLockAcquire(&xlogctl->info_lck);
xlogctl->ckptXidEpoch = checkPoint.nextXidEpoch;
xlogctl->ckptXid = checkPoint.nextXid;
SpinLockRelease(&xlogctl->info_lck);
}
/*
* We are now done with critical updates; no need for system panic if we
* have trouble while fooling with offline log segments.
......@@ -5803,6 +5867,10 @@ xlog_redo(XLogRecPtr lsn, XLogRecord *record)
MultiXactSetNextMXact(checkPoint.nextMulti,
checkPoint.nextMultiOffset);
/* ControlFile->checkPointCopy always tracks the latest ckpt XID */
ControlFile->checkPointCopy.nextXidEpoch = checkPoint.nextXidEpoch;
ControlFile->checkPointCopy.nextXid = checkPoint.nextXid;
/*
* TLI may change in a shutdown checkpoint, but it shouldn't decrease
*/
......@@ -5836,6 +5904,11 @@ xlog_redo(XLogRecPtr lsn, XLogRecord *record)
}
MultiXactAdvanceNextMXact(checkPoint.nextMulti,
checkPoint.nextMultiOffset);
/* ControlFile->checkPointCopy always tracks the latest ckpt XID */
ControlFile->checkPointCopy.nextXidEpoch = checkPoint.nextXidEpoch;
ControlFile->checkPointCopy.nextXid = checkPoint.nextXid;
/* TLI should not change in an on-line checkpoint */
if (checkPoint.ThisTimeLineID != ThisTimeLineID)
ereport(PANIC,
......@@ -5861,10 +5934,11 @@ xlog_desc(StringInfo buf, uint8 xl_info, char *rec)
CheckPoint *checkpoint = (CheckPoint *) rec;
appendStringInfo(buf, "checkpoint: redo %X/%X; undo %X/%X; "
"tli %u; xid %u; oid %u; multi %u; offset %u; %s",
"tli %u; xid %u/%u; oid %u; multi %u; offset %u; %s",
checkpoint->redo.xlogid, checkpoint->redo.xrecoff,
checkpoint->undo.xlogid, checkpoint->undo.xrecoff,
checkpoint->ThisTimeLineID, checkpoint->nextXid,
checkpoint->ThisTimeLineID,
checkpoint->nextXidEpoch, checkpoint->nextXid,
checkpoint->nextOid,
checkpoint->nextMulti,
checkpoint->nextMultiOffset,
......
......@@ -6,7 +6,7 @@
* copyright (c) Oliver Elphick <olly@lfix.co.uk>, 2001;
* licence: BSD
*
* $PostgreSQL: pgsql/src/bin/pg_controldata/pg_controldata.c,v 1.30 2006/08/07 16:57:56 tgl Exp $
* $PostgreSQL: pgsql/src/bin/pg_controldata/pg_controldata.c,v 1.31 2006/08/21 16:16:31 tgl Exp $
*/
#include "postgres.h"
......@@ -177,7 +177,8 @@ main(int argc, char *argv[])
ControlFile.checkPointCopy.undo.xrecoff);
printf(_("Latest checkpoint's TimeLineID: %u\n"),
ControlFile.checkPointCopy.ThisTimeLineID);
printf(_("Latest checkpoint's NextXID: %u\n"),
printf(_("Latest checkpoint's NextXID: %u/%u\n"),
ControlFile.checkPointCopy.nextXidEpoch,
ControlFile.checkPointCopy.nextXid);
printf(_("Latest checkpoint's NextOID: %u\n"),
ControlFile.checkPointCopy.nextOid);
......
......@@ -23,7 +23,7 @@
* Portions Copyright (c) 1996-2006, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* $PostgreSQL: pgsql/src/bin/pg_resetxlog/pg_resetxlog.c,v 1.51 2006/08/07 16:57:56 tgl Exp $
* $PostgreSQL: pgsql/src/bin/pg_resetxlog/pg_resetxlog.c,v 1.52 2006/08/21 16:16:31 tgl Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -71,6 +71,7 @@ main(int argc, char *argv[])
int c;
bool force = false;
bool noupdate = false;
uint32 set_xid_epoch = -1;
TransactionId set_xid = 0;
Oid set_oid = 0;
MultiXactId set_mxid = 0;
......@@ -104,7 +105,7 @@ main(int argc, char *argv[])
}
while ((c = getopt(argc, argv, "fl:m:no:O:x:")) != -1)
while ((c = getopt(argc, argv, "fl:m:no:O:x:e:")) != -1)
{
switch (c)
{
......@@ -116,6 +117,21 @@ main(int argc, char *argv[])
noupdate = true;
break;
case 'e':
set_xid_epoch = strtoul(optarg, &endptr, 0);
if (endptr == optarg || *endptr != '\0')
{
fprintf(stderr, _("%s: invalid argument for option -e\n"), progname);
fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname);
exit(1);
}
if (set_xid_epoch == -1)
{
fprintf(stderr, _("%s: transaction ID epoch (-e) must not be -1\n"), progname);
exit(1);
}
break;
case 'x':
set_xid = strtoul(optarg, &endptr, 0);
if (endptr == optarg || *endptr != '\0')
......@@ -271,6 +287,9 @@ main(int argc, char *argv[])
* Adjust fields if required by switches. (Do this now so that printout,
* if any, includes these values.)
*/
if (set_xid_epoch != -1)
ControlFile.checkPointCopy.nextXidEpoch = set_xid_epoch;
if (set_xid != 0)
ControlFile.checkPointCopy.nextXid = set_xid;
......@@ -441,6 +460,7 @@ GuessControlValues(void)
ControlFile.checkPointCopy.redo.xrecoff = SizeOfXLogLongPHD;
ControlFile.checkPointCopy.undo = ControlFile.checkPointCopy.redo;
ControlFile.checkPointCopy.ThisTimeLineID = 1;
ControlFile.checkPointCopy.nextXidEpoch = 0;
ControlFile.checkPointCopy.nextXid = (TransactionId) 514; /* XXX */
ControlFile.checkPointCopy.nextOid = FirstBootstrapObjectId;
ControlFile.checkPointCopy.nextMulti = FirstMultiXactId;
......@@ -513,29 +533,50 @@ PrintControlValues(bool guessed)
snprintf(sysident_str, sizeof(sysident_str), UINT64_FORMAT,
ControlFile.system_identifier);
printf(_("pg_control version number: %u\n"), ControlFile.pg_control_version);
printf(_("Catalog version number: %u\n"), ControlFile.catalog_version_no);
printf(_("Database system identifier: %s\n"), sysident_str);
printf(_("Current log file ID: %u\n"), ControlFile.logId);
printf(_("Next log file segment: %u\n"), ControlFile.logSeg);
printf(_("Latest checkpoint's TimeLineID: %u\n"), ControlFile.checkPointCopy.ThisTimeLineID);
printf(_("Latest checkpoint's NextXID: %u\n"), ControlFile.checkPointCopy.nextXid);
printf(_("Latest checkpoint's NextOID: %u\n"), ControlFile.checkPointCopy.nextOid);
printf(_("Latest checkpoint's NextMultiXactId: %u\n"), ControlFile.checkPointCopy.nextMulti);
printf(_("Latest checkpoint's NextMultiOffset: %u\n"), ControlFile.checkPointCopy.nextMultiOffset);
printf(_("Maximum data alignment: %u\n"), ControlFile.maxAlign);
printf(_("pg_control version number: %u\n"),
ControlFile.pg_control_version);
printf(_("Catalog version number: %u\n"),
ControlFile.catalog_version_no);
printf(_("Database system identifier: %s\n"),
sysident_str);
printf(_("Current log file ID: %u\n"),
ControlFile.logId);
printf(_("Next log file segment: %u\n"),
ControlFile.logSeg);
printf(_("Latest checkpoint's TimeLineID: %u\n"),
ControlFile.checkPointCopy.ThisTimeLineID);
printf(_("Latest checkpoint's NextXID: %u/%u\n"),
ControlFile.checkPointCopy.nextXidEpoch,
ControlFile.checkPointCopy.nextXid);
printf(_("Latest checkpoint's NextOID: %u\n"),
ControlFile.checkPointCopy.nextOid);
printf(_("Latest checkpoint's NextMultiXactId: %u\n"),
ControlFile.checkPointCopy.nextMulti);
printf(_("Latest checkpoint's NextMultiOffset: %u\n"),
ControlFile.checkPointCopy.nextMultiOffset);
printf(_("Maximum data alignment: %u\n"),
ControlFile.maxAlign);
/* we don't print floatFormat since can't say much useful about it */
printf(_("Database block size: %u\n"), ControlFile.blcksz);
printf(_("Blocks per segment of large relation: %u\n"), ControlFile.relseg_size);
printf(_("WAL block size: %u\n"), ControlFile.xlog_blcksz);
printf(_("Bytes per WAL segment: %u\n"), ControlFile.xlog_seg_size);
printf(_("Maximum length of identifiers: %u\n"), ControlFile.nameDataLen);
printf(_("Maximum columns in an index: %u\n"), ControlFile.indexMaxKeys);
printf(_("Database block size: %u\n"),
ControlFile.blcksz);
printf(_("Blocks per segment of large relation: %u\n"),
ControlFile.relseg_size);
printf(_("WAL block size: %u\n"),
ControlFile.xlog_blcksz);
printf(_("Bytes per WAL segment: %u\n"),
ControlFile.xlog_seg_size);
printf(_("Maximum length of identifiers: %u\n"),
ControlFile.nameDataLen);
printf(_("Maximum columns in an index: %u\n"),
ControlFile.indexMaxKeys);
printf(_("Date/time type storage: %s\n"),
(ControlFile.enableIntTimes ? _("64-bit integers") : _("floating-point numbers")));
printf(_("Maximum length of locale name: %u\n"), ControlFile.localeBuflen);
printf(_("LC_COLLATE: %s\n"), ControlFile.lc_collate);
printf(_("LC_CTYPE: %s\n"), ControlFile.lc_ctype);
printf(_("Maximum length of locale name: %u\n"),
ControlFile.localeBuflen);
printf(_("LC_COLLATE: %s\n"),
ControlFile.lc_collate);
printf(_("LC_CTYPE: %s\n"),
ControlFile.lc_ctype);
}
......@@ -810,6 +851,7 @@ usage(void)
printf(_(" -o OID set next OID\n"));
printf(_(" -O OFFSET set next multitransaction offset\n"));
printf(_(" -x XID set next transaction ID\n"));
printf(_(" -e XIDEPOCH set next transaction ID epoch\n"));
printf(_(" --help show this help, then exit\n"));
printf(_(" --version output version information, then exit\n"));
printf(_("\nReport bugs to <pgsql-bugs@postgresql.org>.\n"));
......
......@@ -6,7 +6,7 @@
* Portions Copyright (c) 1996-2006, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* $PostgreSQL: pgsql/src/include/access/xlog.h,v 1.73 2006/08/17 23:04:08 tgl Exp $
* $PostgreSQL: pgsql/src/include/access/xlog.h,v 1.74 2006/08/21 16:16:31 tgl Exp $
*/
#ifndef XLOG_H
#define XLOG_H
......@@ -166,5 +166,6 @@ extern void CreateCheckPoint(bool shutdown, bool force);
extern void XLogPutNextOid(Oid nextOid);
extern XLogRecPtr GetRedoRecPtr(void);
extern TransactionId GetRecentNextXid(void);
extern void GetNextXidAndEpoch(TransactionId *xid, uint32 *epoch);
#endif /* XLOG_H */
......@@ -8,7 +8,7 @@
* Portions Copyright (c) 1996-2006, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* $PostgreSQL: pgsql/src/include/catalog/pg_control.h,v 1.31 2006/08/07 16:57:57 tgl Exp $
* $PostgreSQL: pgsql/src/include/catalog/pg_control.h,v 1.32 2006/08/21 16:16:31 tgl Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -22,7 +22,7 @@
/* Version identifier for this pg_control format */
#define PG_CONTROL_VERSION 821
#define PG_CONTROL_VERSION 822
/*
* Body of CheckPoint XLOG records. This is declared here because we keep
......@@ -36,6 +36,7 @@ typedef struct CheckPoint
* transaction when we started (i.e. UNDO end
* point) */
TimeLineID ThisTimeLineID; /* current TLI */
uint32 nextXidEpoch; /* higher-order bits of nextXid */
TransactionId nextXid; /* next free XID */
Oid nextOid; /* next free OID */
MultiXactId nextMulti; /* next free MultiXactId */
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment